doxygen/RISCVISelLowering_8cpp_source.html

//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation  -------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file defines the interfaces that RISC-V uses to lower LLVM code into a

// selection DAG.

//

//===----------------------------------------------------------------------===//


#include "RISCVISelLowering.h"

#include "MCTargetDesc/RISCVMatInt.h"

#include "RISCV.h"

#include "RISCVConstantPoolValue.h"

#include "RISCVMachineFunctionInfo.h"

#include "RISCVRegisterInfo.h"

#include "RISCVSelectionDAGInfo.h"

#include "RISCVSubtarget.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/MemoryLocation.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/Analysis/VectorUtils.h"

#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineJumpTableInfo.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/SDPatternMatch.h"

#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"

#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"

#include "llvm/CodeGen/ValueTypes.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/DiagnosticPrinter.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/IntrinsicsRISCV.h"

#include "llvm/MC/MCCodeEmitter.h"

#include "llvm/MC/MCInstBuilder.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/InstructionCost.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

#include <optional>


using namespace llvm;


#define DEBUG_TYPE "riscv-lower"


STATISTIC(NumTailCalls, "Number of tail calls");


static cl::opt<unsigned> ExtensionMaxWebSize(

    DEBUG_TYPE "-ext-max-web-size", cl::Hidden,

    cl::desc("Give the maximum size (in number of nodes) of the web of "

             "instructions that we will consider for VW expansion"),

    cl::init(18));


static cl::opt<bool>

    AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,

                     cl::desc("Allow the formation of VW_W operations (e.g., "

                              "VWADD_W) with splat constants"),

                     cl::init(false));


static cl::opt<unsigned> NumRepeatedDivisors(

    DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,

    cl::desc("Set the minimum number of repetitions of a divisor to allow "

             "transformation to multiplications by the reciprocal"),

    cl::init(2));


static cl::opt<int>

    FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,

              cl::desc("Give the maximum number of instructions that we will "

                       "use for creating a floating-point immediate value"),

              cl::init(3));


static cl::opt<bool>

    ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,

                      cl::desc("Swap add and addi in cases where the add may "

                               "be combined with a shift"),

                      cl::init(true));


// TODO: Support more ops


static const unsigned ZvfbfaOps[] = {

    ISD::FNEG,        ISD::FABS,        ISD::FCOPYSIGN,   ISD::FADD,

    ISD::FSUB,        ISD::FMUL,        ISD::FMINNUM,     ISD::FMAXNUM,

    ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUM,    ISD::FMAXIMUM,

    ISD::FMA,         ISD::IS_FPCLASS,  ISD::STRICT_FADD, ISD::STRICT_FSUB,

    ISD::STRICT_FMUL, ISD::STRICT_FMA,  ISD::SETCC};


RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

                                         const RISCVSubtarget &STI)

    : TargetLowering(TM, STI), Subtarget(STI) {


  RISCVABI::ABI ABI = Subtarget.getTargetABI();

  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");


  if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&

      !Subtarget.hasStdExtF()) {

    errs() << "Hard-float 'f' ABI can't be used for a target that "

                "doesn't support the F instruction set extension (ignoring "

                          "target-abi)\n";

    ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;

  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&

             !Subtarget.hasStdExtD()) {

    errs() << "Hard-float 'd' ABI can't be used for a target that "

              "doesn't support the D instruction set extension (ignoring "

              "target-abi)\n";

    ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;

  }


  switch (ABI) {

  default:

    reportFatalUsageError("Don't know how to lower this ABI");

  case RISCVABI::ABI_ILP32:

  case RISCVABI::ABI_ILP32E:

  case RISCVABI::ABI_LP64E:

  case RISCVABI::ABI_ILP32F:

  case RISCVABI::ABI_ILP32D:

  case RISCVABI::ABI_LP64:

  case RISCVABI::ABI_LP64F:

  case RISCVABI::ABI_LP64D:

    break;

  }


  MVT XLenVT = Subtarget.getXLenVT();


  // Set up the register classes.

  addRegisterClass(XLenVT, &RISCV::GPRRegClass);


  if (Subtarget.hasStdExtZfhmin())

    addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);

  if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())

    addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);

  if (Subtarget.hasStdExtF())

    addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);

  if (Subtarget.hasStdExtD())

    addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);

  if (Subtarget.hasStdExtZhinxmin())

    addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);

  if (Subtarget.hasStdExtZfinx())

    addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);

  if (Subtarget.hasStdExtZdinx()) {

    if (Subtarget.is64Bit())

      addRegisterClass(MVT::f64, &RISCV::GPRRegClass);

    else

      addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);

  }


  static const MVT::SimpleValueType BoolVecVTs[] = {

      MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,

      MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};

  static const MVT::SimpleValueType IntVecVTs[] = {

      MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,

      MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,

      MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,

      MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,

      MVT::nxv4i64, MVT::nxv8i64};

  static const MVT::SimpleValueType F16VecVTs[] = {

      MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,

      MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};

  static const MVT::SimpleValueType BF16VecVTs[] = {

      MVT::nxv1bf16, MVT::nxv2bf16,  MVT::nxv4bf16,

      MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};

  static const MVT::SimpleValueType F32VecVTs[] = {

      MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};

  static const MVT::SimpleValueType F64VecVTs[] = {

      MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};

  static const MVT::SimpleValueType VecTupleVTs[] = {

      MVT::riscv_nxv1i8x2,  MVT::riscv_nxv1i8x3,  MVT::riscv_nxv1i8x4,

      MVT::riscv_nxv1i8x5,  MVT::riscv_nxv1i8x6,  MVT::riscv_nxv1i8x7,

      MVT::riscv_nxv1i8x8,  MVT::riscv_nxv2i8x2,  MVT::riscv_nxv2i8x3,

      MVT::riscv_nxv2i8x4,  MVT::riscv_nxv2i8x5,  MVT::riscv_nxv2i8x6,

      MVT::riscv_nxv2i8x7,  MVT::riscv_nxv2i8x8,  MVT::riscv_nxv4i8x2,

      MVT::riscv_nxv4i8x3,  MVT::riscv_nxv4i8x4,  MVT::riscv_nxv4i8x5,

      MVT::riscv_nxv4i8x6,  MVT::riscv_nxv4i8x7,  MVT::riscv_nxv4i8x8,

      MVT::riscv_nxv8i8x2,  MVT::riscv_nxv8i8x3,  MVT::riscv_nxv8i8x4,

      MVT::riscv_nxv8i8x5,  MVT::riscv_nxv8i8x6,  MVT::riscv_nxv8i8x7,

      MVT::riscv_nxv8i8x8,  MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,

      MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};


  if (Subtarget.hasVInstructions()) {

    auto addRegClassForRVV = [this](MVT VT) {

      // Disable the smallest fractional LMUL types if ELEN is less than

      // RVVBitsPerBlock.

      unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();

      if (VT.getVectorMinNumElements() < MinElts)

        return;


      unsigned Size = VT.getSizeInBits().getKnownMinValue();

      const TargetRegisterClass *RC;

      if (Size <= RISCV::RVVBitsPerBlock)

        RC = &RISCV::VRRegClass;

      else if (Size == 2 * RISCV::RVVBitsPerBlock)

        RC = &RISCV::VRM2RegClass;

      else if (Size == 4 * RISCV::RVVBitsPerBlock)

        RC = &RISCV::VRM4RegClass;

      else if (Size == 8 * RISCV::RVVBitsPerBlock)

        RC = &RISCV::VRM8RegClass;

      else

        llvm_unreachable("Unexpected size");


      addRegisterClass(VT, RC);

    };


    for (MVT VT : BoolVecVTs)

      addRegClassForRVV(VT);

    for (MVT VT : IntVecVTs) {

      if (VT.getVectorElementType() == MVT::i64 &&

          !Subtarget.hasVInstructionsI64())

        continue;

      addRegClassForRVV(VT);

    }


    if (Subtarget.hasVInstructionsF16Minimal() ||

        Subtarget.hasVendorXAndesVPackFPH())

      for (MVT VT : F16VecVTs)

        addRegClassForRVV(VT);


    if (Subtarget.hasVInstructionsBF16Minimal() ||

        Subtarget.hasVendorXAndesVBFHCvt())

      for (MVT VT : BF16VecVTs)

        addRegClassForRVV(VT);


    if (Subtarget.hasVInstructionsF32())

      for (MVT VT : F32VecVTs)

        addRegClassForRVV(VT);


    if (Subtarget.hasVInstructionsF64())

      for (MVT VT : F64VecVTs)

        addRegClassForRVV(VT);


    if (Subtarget.useRVVForFixedLengthVectors()) {

      auto addRegClassForFixedVectors = [this](MVT VT) {

        MVT ContainerVT = getContainerForFixedLengthVector(VT);

        unsigned RCID = getRegClassIDForVecVT(ContainerVT);

        const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();

        addRegisterClass(VT, TRI.getRegClass(RCID));

      };

      for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())

        if (useRVVForFixedLengthVectorVT(VT))

          addRegClassForFixedVectors(VT);


      for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())

        if (useRVVForFixedLengthVectorVT(VT))

          addRegClassForFixedVectors(VT);

    }


    addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);

    addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);

    addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);

    addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);

    addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);

    addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);

    addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);

    addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);

    addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);

    addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);

    addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);

    addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);

    addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);

    addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);

    addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);

    addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);

    addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);

    addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);

    addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);

    addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);

    addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);

    addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);

    addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);

    addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);

    addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);

    addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);

    addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);

    addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);

    addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);

    addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);

    addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);

    addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);

  }


  // fixed vector is stored in GPRs for P extension packed operations

  if (Subtarget.hasStdExtP()) {

    if (Subtarget.is64Bit()) {

      addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);

      addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);

      addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);

    } else {

      addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);

      addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);


      addRegisterClass(MVT::v2i32, &RISCV::GPRPairRegClass);

      addRegisterClass(MVT::v4i16, &RISCV::GPRPairRegClass);

      addRegisterClass(MVT::v8i8, &RISCV::GPRPairRegClass);

    }

  }


  // Compute derived properties from the register classes.

  computeRegisterProperties(STI.getRegisterInfo());


  setStackPointerRegisterToSaveRestore(RISCV::X2);


  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,

                   MVT::i1, Promote);

  // DAGCombiner can call isLoadExtLegal for types that aren't legal.

  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,

                   MVT::i1, Promote);


  // TODO: add all necessary setOperationAction calls.

  setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);


  setOperationAction(ISD::BR_JT, MVT::Other, Expand);

  setOperationAction(ISD::BR_CC, XLenVT, Expand);

  setOperationAction(ISD::BRCOND, MVT::Other, Custom);

  setOperationAction(ISD::SELECT_CC, XLenVT, Expand);


  setCondCodeAction(ISD::SETGT, XLenVT, Custom);

  setCondCodeAction(ISD::SETGE, XLenVT, Expand);

  setCondCodeAction(ISD::SETUGT, XLenVT, Custom);

  setCondCodeAction(ISD::SETUGE, XLenVT, Expand);

  if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {

    setCondCodeAction(ISD::SETULE, XLenVT, Expand);

    setCondCodeAction(ISD::SETLE, XLenVT, Expand);

  }


  setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);


  setOperationAction(ISD::VASTART, MVT::Other, Custom);

  setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);


  if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&

      !Subtarget.hasVendorXAndesPerf())

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);


  setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);


  if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&

      !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&

      !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))

    setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);


  if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {

    setOperationAction(ISD::LOAD, MVT::i64, Custom);

    setOperationAction(ISD::STORE, MVT::i64, Custom);

  }


  if (Subtarget.is64Bit()) {

    setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);


    setOperationAction(ISD::LOAD, MVT::i32, Custom);

    setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},

                       MVT::i32, Custom);

    setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Custom);

    setOperationAction({ISD::SADDO, ISD::SSUBO}, MVT::i32, Custom);

  } else if (Subtarget.hasStdExtP()) {

    // Custom legalize i64 ADD/SUB/SHL/SRL/SRA for RV32+P.

    setOperationAction({ISD::ADD, ISD::SUB}, MVT::i64, Custom);

    setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, MVT::i64, Custom);

  }

  if (!Subtarget.hasStdExtZmmul()) {

    setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);

  } else if (Subtarget.is64Bit()) {

    setOperationAction(ISD::MUL, MVT::i128, Custom);

    setOperationAction(ISD::MUL, MVT::i32, Custom);

  } else {

    setOperationAction(ISD::MUL, MVT::i64, Custom);

  }


  if (!Subtarget.hasStdExtM()) {

    setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, XLenVT,

                       Expand);

  } else if (Subtarget.is64Bit()) {

    setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},

                       {MVT::i8, MVT::i16, MVT::i32}, Custom);

  }


  setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, XLenVT, Expand);


  // On RV32, the P extension has a WMUL(U) instruction we can use for

  // (S/U)MUL_LOHI.

  // FIXME: Does P imply Zmmul?

  if (!Subtarget.hasStdExtP() || !Subtarget.hasStdExtZmmul() ||

      Subtarget.is64Bit())

    setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, Expand);


  setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,

                     Custom);


  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {

    if (Subtarget.is64Bit())

      setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);

  } else if (Subtarget.hasVendorXTHeadBb()) {

    if (Subtarget.is64Bit())

      setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);

    setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);

  } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {

    setOperationAction(ISD::ROTL, XLenVT, Expand);

  } else {

    setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);

  }


  if (Subtarget.hasStdExtP())

    setOperationAction({ISD::FSHL, ISD::FSHR}, XLenVT, Legal);


  setOperationAction(ISD::BSWAP, XLenVT,

                     Subtarget.hasREV8Like() ? Legal : Expand);


  if (Subtarget.hasREVLike()) {

    setOperationAction(ISD::BITREVERSE, XLenVT, Legal);

  } else {

    // Zbkb can use rev8+brev8 to implement bitreverse.

    setOperationAction(ISD::BITREVERSE, XLenVT,

                       Subtarget.hasStdExtZbkb() ? Custom : Expand);

    if (Subtarget.hasStdExtZbkb())

      setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);

  }


  if (Subtarget.hasStdExtZbb() ||

      (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {

    setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,

                       Legal);

  }


  if (Subtarget.hasCTZLike()) {

    if (Subtarget.is64Bit())

      setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_POISON}, MVT::i32, Custom);

  } else {

    setOperationAction(ISD::CTTZ, XLenVT, Expand);

  }


  if (!Subtarget.hasCPOPLike()) {

    // TODO: These should be set to LibCall, but this currently breaks

    //   the Linux kernel build. See #101786. Lacks i128 tests, too.

    if (Subtarget.is64Bit())

      setOperationAction(ISD::CTPOP, MVT::i128, Expand);

    else

      setOperationAction(ISD::CTPOP, MVT::i32, Expand);

    setOperationAction(ISD::CTPOP, MVT::i64, Expand);

  }


  if (Subtarget.hasCLZLike()) {

    // We need the custom lowering to make sure that the resulting sequence

    // for the 32bit case is efficient on 64bit targets.

    // Use default promotion for i32 without Zbb.

    if (Subtarget.is64Bit() &&

        (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))

      setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_POISON}, MVT::i32, Custom);

  } else {

    setOperationAction(ISD::CTLZ, XLenVT, Expand);

  }


  if (Subtarget.hasStdExtP()) {

    setOperationAction(ISD::CTLS, XLenVT, Legal);

    if (Subtarget.is64Bit())

      setOperationAction(ISD::CTLS, MVT::i32, Custom);

  }


  if (Subtarget.hasStdExtP() ||

      (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {

    setOperationAction(ISD::ABS, XLenVT, Legal);

    if (Subtarget.is64Bit())

      setOperationAction({ISD::ABS, ISD::ABS_MIN_POISON}, MVT::i32, Custom);

  } else if (Subtarget.hasShortForwardBranchIALU()) {

    // We can use PseudoCCSUB to implement ABS.

    setOperationAction(ISD::ABS, XLenVT, Legal);

  } else if (Subtarget.is64Bit()) {

    setOperationAction({ISD::ABS, ISD::ABS_MIN_POISON}, MVT::i32, Custom);

  }


  if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())

    setOperationAction(ISD::SELECT, XLenVT, Custom);


  if ((Subtarget.hasStdExtP() || Subtarget.hasVendorXqcia()) &&

      !Subtarget.is64Bit()) {

    setOperationAction({ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT},

                       MVT::i32, Legal);

  } else if (Subtarget.hasStdExtP() && Subtarget.is64Bit()) {

    setOperationAction({ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT},

                       MVT::i32, Custom);

  } else if (!Subtarget.hasStdExtZbb() && Subtarget.is64Bit()) {

    setOperationAction({ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT},

                       MVT::i32, Custom);

  }


  if ((Subtarget.hasStdExtP() || Subtarget.hasVendorXqcia()) &&

      !Subtarget.is64Bit()) {

    // FIXME: Support i32 on RV64+P by inserting into a v2i32 vector, doing

    // pssha.w/psshl.w and extracting.

    setOperationAction(ISD::SSHLSAT, MVT::i32, Legal);

    setOperationAction(ISD::USHLSAT, MVT::i32, Legal);

  }


  if (Subtarget.hasStdExtP() && !Subtarget.is64Bit()) {

    // FIXME: Support i32 on RV64+P by inserting into a v2i32 vector, doing

    // paadd.w, paaddu.w and extracting.

    setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, MVT::i32, Legal);

  }


  if (Subtarget.hasStdExtZbc() || Subtarget.hasStdExtZbkc())

    setOperationAction({ISD::CLMUL, ISD::CLMULH}, XLenVT, Legal);

  if (Subtarget.hasStdExtZbc())

    setOperationAction(ISD::CLMULR, XLenVT, Legal);


  static const unsigned FPLegalNodeTypes[] = {

      ISD::FMINNUM,       ISD::FMAXNUM,        ISD::FMINIMUMNUM,

      ISD::FMAXIMUMNUM,   ISD::LRINT,          ISD::LLRINT,

      ISD::LROUND,        ISD::LLROUND,        ISD::STRICT_LRINT,

      ISD::STRICT_LLRINT, ISD::STRICT_LROUND,  ISD::STRICT_LLROUND,

      ISD::STRICT_FMA,    ISD::STRICT_FADD,    ISD::STRICT_FSUB,

      ISD::STRICT_FMUL,   ISD::STRICT_FDIV,    ISD::STRICT_FSQRT,

      ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::FCANONICALIZE};


  static const ISD::CondCode FPCCToExpand[] = {

      ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,

      ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,

      ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};


  static const unsigned FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS,

                                          ISD::FPOW};

  static const unsigned FPOpToLibCall[] = {ISD::FREM};


  static const unsigned FPRndMode[] = {

      ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,

      ISD::FROUNDEVEN};


  static const unsigned ZfhminZfbfminPromoteOps[] = {

      ISD::FMINNUM,       ISD::FMAXNUM,        ISD::FMINIMUM,

      ISD::FMAXIMUM,      ISD::FMAXIMUMNUM,    ISD::FMINIMUMNUM,

      ISD::FADD,          ISD::FSUB,           ISD::FMUL,

      ISD::FMA,           ISD::FDIV,           ISD::FSQRT,

      ISD::STRICT_FMA,    ISD::STRICT_FADD,    ISD::STRICT_FSUB,

      ISD::STRICT_FMUL,   ISD::STRICT_FDIV,    ISD::STRICT_FSQRT,

      ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::SETCC,

      ISD::FCEIL,         ISD::FFLOOR,         ISD::FTRUNC,

      ISD::FRINT,         ISD::FROUND,         ISD::FROUNDEVEN,

      ISD::FCANONICALIZE};


  if (Subtarget.hasStdExtP()) {

    static const MVT P32VecVTs[] = {MVT::v2i16, MVT::v4i8};

    static const MVT P64VecVTs[] = {MVT::v2i32, MVT::v4i16, MVT::v8i8};

    ArrayRef<MVT> VTs;

    if (Subtarget.is64Bit()) {

      VTs = P64VecVTs;

      // There's no instruction for vector shamt in P extension so we unroll to

      // scalar instructions. Vector VTs that are 32-bit are widened to 64-bit

      // vector, e.g. v2i16 -> v4i16, before getting unrolled, so we need custom

      // widen for those operations that will be unrolled.

      setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA},

                         {MVT::v2i16, MVT::v4i8}, Custom);

    } else {

      VTs = P32VecVTs;

    }

    // By default everything must be expanded.

    for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)

      setOperationAction(Op, VTs, Expand);


    for (MVT VT : VTs) {

      for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {

        setTruncStoreAction(VT, OtherVT, Expand);

        setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,

                         OtherVT, Expand);

      }

    }


    setOperationAction({ISD::LOAD, ISD::STORE}, VTs, Legal);

    setOperationAction({ISD::ADD, ISD::SUB}, VTs, Legal);

    setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VTs, Legal);

    setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, VTs, Legal);

    setOperationAction(ISD::UADDSAT, VTs, Legal);

    setOperationAction(ISD::SADDSAT, VTs, Legal);

    setOperationAction(ISD::USUBSAT, VTs, Legal);

    setOperationAction(ISD::SSUBSAT, VTs, Legal);

    setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VTs, Legal);

    setOperationAction(ISD::BITREVERSE, VTs, Legal);

    for (MVT VT : VTs) {

      if (VT != MVT::v2i32)

        setOperationAction({ISD::ABS, ISD::ABDS, ISD::ABDU}, VT, Legal);

      if (VT.getVectorElementType() != MVT::i8) {

        setOperationAction(ISD::SSHLSAT, VT, Custom);

        setOperationAction(ISD::BSWAP, VT, Legal);

      }

    }

    setOperationAction(ISD::SPLAT_VECTOR, VTs, Legal);

    setOperationAction(ISD::BUILD_VECTOR, VTs, Legal);

    setOperationAction(ISD::SCALAR_TO_VECTOR, VTs, Legal);

    setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, VTs, Custom);

    setOperationAction(ISD::BITCAST, VTs, Custom);

    setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}, VTs,

                       Custom);

    setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VTs,

                       Legal);

    setOperationAction(ISD::SELECT, VTs, Custom);

    setOperationAction(ISD::VSELECT, VTs, Legal);

    setOperationAction(ISD::SETCC, VTs, Legal);

    setCondCodeAction(

        {ISD::SETGE, ISD::SETUGT, ISD::SETUGE, ISD::SETULE, ISD::SETLE}, VTs,

        Expand);

    setCondCodeAction({ISD::SETNE, ISD::SETGT}, VTs, Custom);


    if (!Subtarget.is64Bit())

      setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom);


    // P extension vector comparisons produce all 1s for true, all 0s for false

    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);


    if (!Subtarget.is64Bit()) {

      // By default everything must be expanded.

      for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)

        setOperationAction(Op, P64VecVTs, Expand);


      for (MVT VT : P64VecVTs) {

        for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {

          setTruncStoreAction(VT, OtherVT, Expand);

          setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,

                           OtherVT, Expand);

        }

      }


      setOperationAction({ISD::LOAD, ISD::STORE}, P64VecVTs, Custom);

      setOperationAction(ISD::BITCAST, P64VecVTs, Custom);

      setOperationAction({ISD::ADD, ISD::SUB}, P64VecVTs, Legal);

      setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, {MVT::v4i16, MVT::v8i8},

                         Custom);

      setOperationAction(

          {ISD::UADDSAT, ISD::SADDSAT, ISD::USUBSAT, ISD::SSUBSAT}, P64VecVTs,

          Legal);

      setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, P64VecVTs, Legal);

      setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},

                         P64VecVTs, Legal);

      setOperationAction({ISD::ABS, ISD::ABDS, ISD::ABDU},

                         {MVT::v4i16, MVT::v8i8}, Legal);

      setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, P64VecVTs, Custom);

      setOperationAction(ISD::SSHLSAT, {MVT::v2i32, MVT::v4i16}, Custom);

      setOperationAction(ISD::BSWAP, MVT::v4i16, Legal);

      setOperationAction(ISD::BITREVERSE, {MVT::v4i16, MVT::v8i8}, Legal);

      setOperationAction(ISD::SPLAT_VECTOR, P64VecVTs, Legal);

      setOperationAction(ISD::BUILD_VECTOR, P64VecVTs, Legal);

      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);

      setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},

                         {MVT::v4i16, MVT::v8i8}, Custom);

      setOperationAction(ISD::CONCAT_VECTORS, {MVT::v4i16, MVT::v8i8}, Legal);

      setOperationAction(ISD::EXTRACT_SUBVECTOR, {MVT::v2i16, MVT::v4i8},

                         Legal);

      setOperationAction({ISD::SELECT, ISD::VSELECT}, {MVT::v4i16, MVT::v8i8},

                         Custom);

      setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU},

                         {MVT::v4i16, MVT::v8i8}, Custom);

      setOperationAction({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},

                         {MVT::v4i16, MVT::v2i32}, Legal);

      setOperationAction(ISD::TRUNCATE, {MVT::v4i8, MVT::v2i16}, Legal);

      setOperationAction(ISD::SETCC, P64VecVTs, Legal);

      setCondCodeAction(

          {ISD::SETGE, ISD::SETUGT, ISD::SETUGE, ISD::SETULE, ISD::SETLE},

          P64VecVTs, Expand);

      setCondCodeAction({ISD::SETNE, ISD::SETGT}, P64VecVTs, Custom);

    } else {

      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG,

                         {MVT::v4i16, MVT::v2i32}, Legal);

      setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, {MVT::v4i16, MVT::v2i32},

                         Custom);

      // LegalizeVectorOps uses result VT, LegalizeDAG uses ExtVT.

      setOperationAction(ISD::SIGN_EXTEND_INREG,

                         {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16},

                         Legal);

    }

  }


  if (Subtarget.hasStdExtZfbfmin()) {

    setOperationAction(ISD::BITCAST, MVT::i16, Custom);

    setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);

    setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);

    setOperationAction(ISD::SELECT, MVT::bf16, Custom);

    setOperationAction(ISD::BR_CC, MVT::bf16, Expand);

    setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);

    setOperationAction(ISD::FREM, MVT::bf16, Promote);

    setOperationAction(ISD::FABS, MVT::bf16, Custom);

    setOperationAction(ISD::FNEG, MVT::bf16, Custom);

    setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Custom);

    setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, XLenVT, Custom);

    setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, XLenVT, Custom);

  }


  if (Subtarget.hasStdExtZfhminOrZhinxmin()) {

    if (Subtarget.hasStdExtZfhOrZhinx()) {

      setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);

      setOperationAction(FPRndMode, MVT::f16,

                         Subtarget.hasStdExtZfa() ? Legal : Custom);

      setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);

      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,

                         Subtarget.hasStdExtZfa() ? Legal : Custom);

      if (Subtarget.hasStdExtZfa())

        setOperationAction(ISD::ConstantFP, MVT::f16, Custom);

    } else {

      setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);

      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);

      for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,

                      ISD::STRICT_LROUND, ISD::STRICT_LLROUND,

                      ISD::STRICT_LRINT, ISD::STRICT_LLRINT})

        setOperationAction(Op, MVT::f16, Custom);

      setOperationAction(ISD::FABS, MVT::f16, Custom);

      setOperationAction(ISD::FNEG, MVT::f16, Custom);

      setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);

      setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, XLenVT, Custom);

      setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, XLenVT, Custom);

    }


    if (!Subtarget.hasStdExtD()) {

      // FIXME: handle f16 fma when f64 is not legal. Using an f32 fma

      // instruction runs into double rounding issues, so this is wrong.

      // Normally we'd use an f64 fma, but without the D extension the f64 type

      // is not legal. This should probably be a libcall.

      AddPromotedToType(ISD::FMA, MVT::f16, MVT::f32);

      AddPromotedToType(ISD::STRICT_FMA, MVT::f16, MVT::f32);

    }


    setOperationAction(ISD::BITCAST, MVT::i16, Custom);


    setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);

    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);

    setCondCodeAction(FPCCToExpand, MVT::f16, Expand);

    setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);

    setOperationAction(ISD::SELECT, MVT::f16, Custom);

    setOperationAction(ISD::BR_CC, MVT::f16, Expand);


    setOperationAction(

        ISD::FNEARBYINT, MVT::f16,

        Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);

    setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,

                        ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,

                        ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,

                        ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF},

                       MVT::f16, Promote);


    // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have

    // complete support for all operations in LegalizeDAG.

    setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,

                        ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,

                        ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,

                        ISD::STRICT_FTRUNC, ISD::STRICT_FLDEXP},

                       MVT::f16, Promote);


    // We need to custom promote this.

    if (Subtarget.is64Bit())

      setOperationAction(ISD::FPOWI, MVT::i32, Custom);

  }


  if (Subtarget.hasStdExtFOrZfinx()) {

    setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);

    setOperationAction(FPRndMode, MVT::f32,

                       Subtarget.hasStdExtZfa() ? Legal : Custom);

    setCondCodeAction(FPCCToExpand, MVT::f32, Expand);

    setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);

    setOperationAction(ISD::SELECT, MVT::f32, Custom);

    setOperationAction(ISD::BR_CC, MVT::f32, Expand);

    setOperationAction(FPOpToExpand, MVT::f32, Expand);

    setOperationAction(FPOpToLibCall, MVT::f32, LibCall);

    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);

    setTruncStoreAction(MVT::f32, MVT::f16, Expand);

    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);

    setTruncStoreAction(MVT::f32, MVT::bf16, Expand);

    setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);

    setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);

    setOperationAction(ISD::FP_TO_BF16, MVT::f32,

                       Subtarget.isSoftFPABI() ? LibCall : Custom);

    setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);

    setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);

    setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);

    setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);


    if (Subtarget.hasStdExtZfa()) {

      setOperationAction(ISD::ConstantFP, MVT::f32, Custom);

      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);

      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);

    } else {

      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);

    }

  }


  if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())

    setOperationAction(ISD::BITCAST, MVT::i32, Custom);


  if (Subtarget.hasStdExtDOrZdinx()) {

    setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);


    if (!Subtarget.is64Bit())

      setOperationAction(ISD::BITCAST, MVT::i64, Custom);


    if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&

        !Subtarget.is64Bit()) {

      setOperationAction(ISD::LOAD, MVT::f64, Custom);

      setOperationAction(ISD::STORE, MVT::f64, Custom);

    }


    if (Subtarget.hasStdExtZfa()) {

      setOperationAction(ISD::ConstantFP, MVT::f64, Custom);

      setOperationAction(FPRndMode, MVT::f64, Legal);

      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);

      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);

    } else {

      if (Subtarget.is64Bit())

        setOperationAction(FPRndMode, MVT::f64, Custom);


      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);

    }


    setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);

    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);

    setCondCodeAction(FPCCToExpand, MVT::f64, Expand);

    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);

    setOperationAction(ISD::SELECT, MVT::f64, Custom);

    setOperationAction(ISD::BR_CC, MVT::f64, Expand);

    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

    setTruncStoreAction(MVT::f64, MVT::f32, Expand);

    setOperationAction(FPOpToExpand, MVT::f64, Expand);

    setOperationAction(FPOpToLibCall, MVT::f64, LibCall);

    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);

    setTruncStoreAction(MVT::f64, MVT::f16, Expand);

    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);

    setTruncStoreAction(MVT::f64, MVT::bf16, Expand);

    setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);

    setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);

    setOperationAction(ISD::FP_TO_BF16, MVT::f64,

                       Subtarget.isSoftFPABI() ? LibCall : Custom);

    setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);

    setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);

    setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);

    setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);

  }


  if (Subtarget.is64Bit()) {

    setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,

                        ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},

                       MVT::i32, Custom);

    setOperationAction(ISD::LROUND, MVT::i32, Custom);

  }


  if (Subtarget.hasStdExtFOrZfinx()) {

    setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,

                       Custom);


    // f16/bf16 require custom handling.

    setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, XLenVT,

                       Custom);

    setOperationAction({ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, XLenVT,

                       Custom);


    setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);

    setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);

    setOperationAction(ISD::GET_FPENV, XLenVT, Custom);

    setOperationAction(ISD::SET_FPENV, XLenVT, Custom);

    setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);

    setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);

    setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);

    setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);

  }


  setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,

                      ISD::JumpTable},

                     XLenVT, Custom);


  setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);


  if (Subtarget.is64Bit())

    setOperationAction(ISD::Constant, MVT::i64, Custom);


  // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.

  // Unfortunately this can't be determined just from the ISA naming string.

  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,

                     Subtarget.is64Bit() ? Legal : Custom);

  setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,

                     Subtarget.is64Bit() ? Legal : Custom);


  if (Subtarget.is64Bit()) {

    setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);

    setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

  }


  setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);

  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

  if (Subtarget.is64Bit())

    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);


  if (Subtarget.hasVendorXMIPSCBOP())

    setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

  else

    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);


  if (Subtarget.hasStdExtZalrsc()) {

    setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());

    if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())

      setMinCmpXchgSizeInBits(8);

    else

      setMinCmpXchgSizeInBits(32);

  } else if (Subtarget.hasForcedAtomics()) {

    setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());

  } else {

    setMaxAtomicSizeInBitsSupported(0);

  }


  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);


  setBooleanContents(ZeroOrOneBooleanContent);


  if (getTargetMachine().getTargetTriple().isOSLinux()) {

    // Custom lowering of llvm.clear_cache.

    setOperationAction(ISD::CLEAR_CACHE, MVT::Other, Custom);

  }


  if (Subtarget.hasVInstructions()) {

    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);


    setOperationAction(ISD::VSCALE, XLenVT, Custom);


    // RVV intrinsics may have illegal operands.

    // We also need to custom legalize vmv.x.s.

    setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,

                        ISD::INTRINSIC_VOID},

                       {MVT::i8, MVT::i16}, Custom);

    if (Subtarget.is64Bit())

      setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},

                         MVT::i32, Custom);

    else

      setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},

                         MVT::i64, Custom);


    setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},

                       MVT::Other, Custom);


    static const unsigned IntegerVPOps[] = {

        ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,

        ISD::VP_UREM,        ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,

        ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,

        ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,

        ISD::VP_MERGE,

        ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,

        ISD::VP_CTTZ_ELTS,   ISD::VP_CTTZ_ELTS_ZERO_POISON};


    static const unsigned FloatingPointVPOps[] = {

        ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,

        ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,

        ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};


    static const unsigned IntegerVecReduceOps[] = {

        ISD::VECREDUCE_ADD,  ISD::VECREDUCE_AND,  ISD::VECREDUCE_OR,

        ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,

        ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};


    static const unsigned FloatingPointVecReduceOps[] = {

        ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,

        ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};


    static const unsigned FloatingPointLibCallOps[] = {

        ISD::FREM,  ISD::FPOW,   ISD::FCOS, ISD::FSIN,  ISD::FSINCOS, ISD::FEXP,

        ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};


    if (!Subtarget.is64Bit()) {

      // We must custom-lower certain vXi64 operations on RV32 due to the vector

      // element type being illegal.

      setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},

                         MVT::i64, Custom);


      setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);


      setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,

                          ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,

                          ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,

                          ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},

                         MVT::i64, Custom);

    }


    for (MVT VT : BoolVecVTs) {

      if (!isTypeLegal(VT))

        continue;


      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);


      // Mask VTs are custom-expanded into a series of standard nodes

      setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,

                          ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,

                          ISD::SCALAR_TO_VECTOR},

                         VT, Custom);


      setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,

                         Custom);


      setOperationAction(ISD::SELECT, VT, Custom);

      setOperationAction({ISD::SELECT_CC, ISD::VSELECT}, VT,

                         Expand);

      setOperationAction(ISD::VP_MERGE, VT, Custom);


      setOperationAction({ISD::CTTZ_ELTS, ISD::CTTZ_ELTS_ZERO_POISON,

                          ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_POISON},

                         VT, Custom);


      setOperationAction(

          {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,

          Custom);


      setOperationAction(

          {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,

          Custom);


      // RVV has native int->float & float->int conversions where the

      // element type sizes are within one power-of-two of each other. Any

      // wider distances between type sizes have to be lowered as sequences

      // which progressively narrow the gap in stages.

      setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,

                          ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,

                          ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,

                          ISD::STRICT_FP_TO_UINT},

                         VT, Custom);

      setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,

                         Custom);


      // Expand all extending loads to types larger than this, and truncating

      // stores from types larger than this.

      for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {

        setTruncStoreAction(VT, OtherVT, Expand);

        setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,

                         OtherVT, Expand);

      }


      setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);

      setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);


      setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);


      setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);

      setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);


      setOperationPromotedToType(

          {ISD::VECTOR_SPLICE_LEFT, ISD::VECTOR_SPLICE_RIGHT}, VT,

          MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));

    }


    for (MVT VT : IntVecVTs) {

      if (!isTypeLegal(VT))

        continue;


      setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);

      setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);


      // Vectors implement MULHS/MULHU.

      setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);


      // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.

      if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())

        setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);


      setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,

                         Legal);


      if (Subtarget.hasStdExtZvabd()) {

        setOperationAction(ISD::ABS, VT, Legal);

        // Only SEW=8/16 are supported in Zvabd.

        if (VT.getVectorElementType() == MVT::i8 ||

            VT.getVectorElementType() == MVT::i16)

          setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Legal);

        else

          setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);

      } else

        setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);


      // Custom-lower extensions and truncations from/to mask types.

      setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},

                         VT, Custom);


      // RVV has native int->float & float->int conversions where the

      // element type sizes are within one power-of-two of each other. Any

      // wider distances between type sizes have to be lowered as sequences

      // which progressively narrow the gap in stages.

      setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,

                          ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,

                          ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,

                          ISD::STRICT_FP_TO_UINT},

                         VT, Custom);

      setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,

                         Custom);

      setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,

                          ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,

                          ISD::SSUBSAT, ISD::USUBSAT},

                         VT, Legal);


      // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"

      // nodes which truncate by one power of two at a time.

      setOperationAction(

          {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,

          Custom);


      // Custom-lower insert/extract operations to simplify patterns.

      setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,

                         Custom);


      // Custom-lower reduction operations to set up the corresponding custom

      // nodes' operands.

      setOperationAction(IntegerVecReduceOps, VT, Custom);


      setOperationAction(IntegerVPOps, VT, Custom);


      setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);


      setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},

                         VT, Custom);


      setOperationAction(

          {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

           ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},

          VT, Custom);

      setOperationAction(ISD::VP_LOAD_FF, VT, Custom);


      setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,

                          ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},

                         VT, Custom);


      setOperationAction(ISD::SELECT, VT, Custom);

      setOperationAction(ISD::SELECT_CC, VT, Expand);


      setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);


      for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {

        setTruncStoreAction(VT, OtherVT, Expand);

        setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,

                         OtherVT, Expand);

      }


      setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);

      setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);


      setOperationAction({ISD::VECTOR_SPLICE_LEFT, ISD::VECTOR_SPLICE_RIGHT},

                         VT, Custom);


      if (Subtarget.hasStdExtZvkb()) {

        setOperationAction(ISD::BSWAP, VT, Legal);

      } else {

        setOperationAction(ISD::BSWAP, VT, Expand);

        setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);

      }


      if (Subtarget.hasStdExtZvbb()) {

        setOperationAction(ISD::BITREVERSE, VT, Legal);

      } else {

        setOperationAction(ISD::BITREVERSE, VT, Expand);

        setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);


        // Lower CTLZ_ZERO_POISON and CTTZ_ZERO_POISON if element of VT in the

        // range of f32.

        EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

        if (isTypeLegal(FloatVT)) {

          setOperationAction(

              {ISD::CTLZ, ISD::CTLZ_ZERO_POISON, ISD::CTTZ_ZERO_POISON}, VT,

              Custom);

        }

      }


      if (VT.getVectorElementType() == MVT::i64) {

        if (Subtarget.hasStdExtZvbc())

          setOperationAction({ISD::CLMUL, ISD::CLMULH}, VT, Legal);

      } else {

        if (Subtarget.hasStdExtZvbc32e()) {

          setOperationAction({ISD::CLMUL, ISD::CLMULH}, VT, Legal);

        } else if (Subtarget.hasStdExtZvbc()) {

          // Promote to i64 if the lmul is small enough.

          // FIXME: Split if necessary to widen.

          // FIXME: Promote clmulh directly without legalizing to clmul first.

          MVT I64VecVT = MVT::getVectorVT(MVT::i64, VT.getVectorElementCount());

          if (isTypeLegal(I64VecVT))

            setOperationAction(ISD::CLMUL, VT, Custom);

        }

      }


      setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);

      setOperationAction({ISD::MASKED_UDIV, ISD::MASKED_SDIV, ISD::MASKED_UREM,

                          ISD::MASKED_SREM},

                         VT, Legal);

    }


    for (MVT VT : VecTupleVTs) {

      if (!isTypeLegal(VT))

        continue;


      setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);

    }


    // Expand various CCs to best match the RVV ISA, which natively supports UNE

    // but no other unordered comparisons, and supports all ordered comparisons

    // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization

    // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),

    // and we pattern-match those back to the "original", swapping operands once

    // more. This way we catch both operations and both "vf" and "fv" forms with

    // fewer patterns.

    static const ISD::CondCode VFPCCToExpand[] = {

        ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,

        ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,

        ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,

    };


    // TODO: support more ops.

    static const unsigned ZvfhminZvfbfminPromoteOps[] = {

        ISD::FMINNUM,

        ISD::FMAXNUM,

        ISD::FMINIMUMNUM,

        ISD::FMAXIMUMNUM,

        ISD::FADD,

        ISD::FSUB,

        ISD::FMUL,

        ISD::FMA,

        ISD::FDIV,

        ISD::FSQRT,

        ISD::FCEIL,

        ISD::FTRUNC,

        ISD::FFLOOR,

        ISD::FROUND,

        ISD::FROUNDEVEN,

        ISD::FRINT,

        ISD::FNEARBYINT,

        ISD::IS_FPCLASS,

        ISD::SETCC,

        ISD::FMAXIMUM,

        ISD::FMINIMUM,

        ISD::STRICT_FADD,

        ISD::STRICT_FSUB,

        ISD::STRICT_FMUL,

        ISD::STRICT_FDIV,

        ISD::STRICT_FSQRT,

        ISD::STRICT_FMA,

        ISD::VECREDUCE_FADD,

        ISD::VECREDUCE_FMIN,

        ISD::VECREDUCE_FMAX,

        ISD::VECREDUCE_FMINIMUM,

        ISD::VECREDUCE_FMAXIMUM,

        ISD::FCANONICALIZE};


    // TODO: Make more of these ops legal.

    static const unsigned ZvfbfaPromoteOps[] = {ISD::FDIV,

                                                ISD::FSQRT,

                                                ISD::FCEIL,

                                                ISD::FTRUNC,

                                                ISD::FFLOOR,

                                                ISD::FROUND,

                                                ISD::FROUNDEVEN,

                                                ISD::FRINT,

                                                ISD::FNEARBYINT,

                                                ISD::STRICT_FDIV,

                                                ISD::STRICT_FSQRT,

                                                ISD::VECREDUCE_FADD,

                                                ISD::VECREDUCE_FMIN,

                                                ISD::VECREDUCE_FMAX,

                                                ISD::VECREDUCE_FMINIMUM,

                                                ISD::VECREDUCE_FMAXIMUM};


    // TODO: support more vp ops.

    static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {

        ISD::VP_REDUCE_FMIN,

        ISD::VP_REDUCE_FMAX,

        ISD::VP_REDUCE_FMINIMUM,

        ISD::VP_REDUCE_FMAXIMUM};


    // Sets common operation actions on RVV floating-point vector types.

    const auto SetCommonVFPActions = [&](MVT VT) {

      setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);

      // RVV has native FP_ROUND & FP_EXTEND conversions where the element type

      // sizes are within one power-of-two of each other. Therefore conversions

      // between vXf16 and vXf64 must be lowered as sequences which convert via

      // vXf32.

      setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);

      setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);

      setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);

      // Custom-lower insert/extract operations to simplify patterns.

      setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,

                         Custom);

      // Expand various condition codes (explained above).

      setCondCodeAction(VFPCCToExpand, VT, Expand);


      setOperationAction(

          {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,

          Legal);

      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);


      setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,

                          ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,

                          ISD::IS_FPCLASS},

                         VT, Custom);


      setOperationAction(FloatingPointVecReduceOps, VT, Custom);


      // Expand FP operations that need libcalls.

      setOperationAction(FloatingPointLibCallOps, VT, Expand);


      setOperationAction(ISD::FCANONICALIZE, VT, Expand);


      setOperationAction(ISD::FCOPYSIGN, VT, Legal);


      setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);


      setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},

                         VT, Custom);


      setOperationAction(

          {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

           ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},

          VT, Custom);

      setOperationAction(ISD::VP_LOAD_FF, VT, Custom);


      setOperationAction(ISD::SELECT, VT, Custom);

      setOperationAction(ISD::SELECT_CC, VT, Expand);


      setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,

                          ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},

                         VT, Custom);


      setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);

      setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);


      setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE_LEFT,

                          ISD::VECTOR_SPLICE_RIGHT},

                         VT, Custom);

      setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);

      setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);


      setOperationAction(FloatingPointVPOps, VT, Custom);


      setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,

                         Custom);

      setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,

                          ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},

                         VT, Legal);

      setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,

                          ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,

                          ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,

                          ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},

                         VT, Custom);


      setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);

    };


    // Sets common extload/truncstore actions on RVV floating-point vector

    // types.

    const auto SetCommonVFPExtLoadTruncStoreActions =

        [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {

          for (auto SmallVT : SmallerVTs) {

            setTruncStoreAction(VT, SmallVT, Expand);

            setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);

          }

        };


    // Sets common actions for f16 and bf16 for when there's only

    // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.

    const auto SetCommonPromoteToF32Actions = [&](MVT VT) {

      setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);

      setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,

                         Custom);

      setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);

      setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);

      setOperationAction({ISD::VP_MERGE, ISD::SELECT}, VT,

                         Custom);

      setOperationAction(ISD::SELECT_CC, VT, Expand);

      setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,

                          ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,

                          ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,

                          ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE_LEFT,

                          ISD::VECTOR_SPLICE_RIGHT, ISD::VECTOR_COMPRESS},

                         VT, Custom);

      setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);

      setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);

      MVT EltVT = VT.getVectorElementType();

      if (isTypeLegal(EltVT))

        setOperationAction({ISD::SPLAT_VECTOR, ISD::EXTRACT_VECTOR_ELT},

                           VT, Custom);

      else

        setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom);

      setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,

                          ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,

                          ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

                          ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,

                          ISD::VP_SCATTER},

                         VT, Custom);

      setOperationAction(ISD::VP_LOAD_FF, VT, Custom);


      setOperationAction(ISD::FNEG, VT, Expand);

      setOperationAction(ISD::FABS, VT, Expand);

      setOperationAction(ISD::FCOPYSIGN, VT, Expand);


      // Expand FP operations that need libcalls.

      setOperationAction(FloatingPointLibCallOps, VT, Expand);


      setOperationAction(ISD::FCANONICALIZE, VT, Expand);


      // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.

      if (getLMUL(VT) == RISCVVType::LMUL_8) {

        setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);

        setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);

      } else {

        MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

        setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);

        setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);

      }

    };


    // Sets common actions for zvfbfa, some of instructions are supported

    // natively so that we don't need to promote them.

    const auto SetZvfbfaActions = [&](MVT VT) {

      setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);

      setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,

                         Custom);

      setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);

      setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);

      setOperationAction({ISD::VP_MERGE, ISD::SELECT}, VT,

                         Custom);

      setOperationAction(ISD::SELECT_CC, VT, Expand);

      setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,

                          ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,

                          ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE,

                          ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE,

                          ISD::VECTOR_SPLICE_LEFT, ISD::VECTOR_SPLICE_RIGHT,

                          ISD::VECTOR_COMPRESS},

                         VT, Custom);

      setOperationAction(

          {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,

          Legal);

      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);

      setOperationAction(ISD::IS_FPCLASS, VT, Custom);

      setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);

      setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);


      setOperationAction(ISD::FCOPYSIGN, VT, Legal);

      setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);

      setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,

                          ISD::STRICT_FMA},

                         VT, Legal);

      setCondCodeAction(VFPCCToExpand, VT, Expand);


      setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,

                          ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,

                          ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

                          ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,

                          ISD::VP_SCATTER},

                         VT, Custom);

      setOperationAction(ISD::VP_LOAD_FF, VT, Custom);


      // Expand FP operations that need libcalls.

      setOperationAction(FloatingPointLibCallOps, VT, Expand);


      setOperationAction(ISD::FCANONICALIZE, VT, Expand);


      // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.

      if (getLMUL(VT) == RISCVVType::LMUL_8) {

        setOperationAction(ZvfbfaPromoteOps, VT, Custom);

        setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);

      } else {

        MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

        setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);

        setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);

      }

    };


    if (Subtarget.hasVInstructionsF16()) {

      for (MVT VT : F16VecVTs) {

        if (!isTypeLegal(VT))

          continue;

        SetCommonVFPActions(VT);

      }

    } else if (Subtarget.hasVInstructionsF16Minimal()) {

      for (MVT VT : F16VecVTs) {

        if (!isTypeLegal(VT))

          continue;

        SetCommonPromoteToF32Actions(VT);

      }

    }


    if (Subtarget.hasVInstructionsBF16()) {

      for (MVT VT : BF16VecVTs) {

        if (!isTypeLegal(VT))

          continue;

        SetZvfbfaActions(VT);

      }

    } else if (Subtarget.hasVInstructionsBF16Minimal()) {

      for (MVT VT : BF16VecVTs) {

        if (!isTypeLegal(VT))

          continue;

        SetCommonPromoteToF32Actions(VT);

      }

    }


    if (Subtarget.hasVInstructionsF32()) {

      for (MVT VT : F32VecVTs) {

        if (!isTypeLegal(VT))

          continue;

        SetCommonVFPActions(VT);

        SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);

        SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);

      }

    }


    if (Subtarget.hasVInstructionsF64()) {

      for (MVT VT : F64VecVTs) {

        if (!isTypeLegal(VT))

          continue;

        SetCommonVFPActions(VT);

        SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);

        SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);

        SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);

      }

    }


    if (Subtarget.useRVVForFixedLengthVectors()) {

      for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {

        if (!useRVVForFixedLengthVectorVT(VT))

          continue;


        // By default everything must be expanded.

        for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)

          setOperationAction(Op, VT, Expand);

        for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {

          setTruncStoreAction(VT, OtherVT, Expand);

          setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,

                           OtherVT, Expand);

        }


        // Custom lower fixed vector undefs to scalable vector undefs to avoid

        // expansion to a build_vector of 0s.

        setOperationAction(ISD::UNDEF, VT, Custom);


        // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.

        setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,

                           Custom);


        setOperationAction(

            {ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE}, VT,

            Custom);


        setOperationAction({ISD::VECTOR_INTERLEAVE, ISD::VECTOR_DEINTERLEAVE},

                           VT, Custom);


        setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},

                           VT, Custom);


        setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);


        setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);


        setOperationAction(ISD::SETCC, VT, Custom);


        setOperationAction(ISD::SELECT, VT, Custom);


        setOperationAction(

            {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,

            Custom);


        setOperationAction(ISD::BITCAST, VT, Custom);


        setOperationAction(

            {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,

            Custom);


        setOperationAction(

            {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,

            Custom);


        setOperationAction(

            {

                ISD::SINT_TO_FP,

                ISD::UINT_TO_FP,

                ISD::FP_TO_SINT,

                ISD::FP_TO_UINT,

                ISD::STRICT_SINT_TO_FP,

                ISD::STRICT_UINT_TO_FP,

                ISD::STRICT_FP_TO_SINT,

                ISD::STRICT_FP_TO_UINT,

            },

            VT, Custom);

        setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,

                           Custom);


        setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);


        // Operations below are different for between masks and other vectors.

        if (VT.getVectorElementType() == MVT::i1) {

          setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Custom);


          setOperationAction(ISD::VP_MERGE, VT, Custom);


          setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);

          setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);


          setOperationAction({ISD::CTTZ_ELTS, ISD::CTTZ_ELTS_ZERO_POISON}, VT,

                             Custom);

          continue;

        }


        // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to

        // it before type legalization for i64 vectors on RV32. It will then be

        // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.

        // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs

        // improvements first.

        if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {

          setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);

          setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);


          // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.

          setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom);

        }


        setOperationAction(

            {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);


        setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,

                            ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

                            ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,

                            ISD::VP_SCATTER},

                           VT, Custom);

        setOperationAction(ISD::VP_LOAD_FF, VT, Custom);


        setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,

                            ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,

                            ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},

                           VT, Custom);


        setOperationAction(

            {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);


        setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);


        // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.

        if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())

          setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);


        setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,

                            ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,

                            ISD::SSUBSAT, ISD::USUBSAT},

                           VT, Custom);


        setOperationAction(ISD::VSELECT, VT, Custom);


        setOperationAction(

            {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);


        // Custom-lower reduction operations to set up the corresponding custom

        // nodes' operands.

        setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,

                            ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,

                            ISD::VECREDUCE_UMIN},

                           VT, Custom);


        setOperationAction(IntegerVPOps, VT, Custom);


        if (Subtarget.hasStdExtZvkb())

          setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);


        if (Subtarget.hasStdExtZvbb()) {

          setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_POISON,

                              ISD::CTTZ, ISD::CTTZ_ZERO_POISON, ISD::CTPOP},

                             VT, Custom);

        } else {

          // Lower CTLZ_ZERO_POISON and CTTZ_ZERO_POISON if element of VT in the

          // range of f32.

          EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

          if (isTypeLegal(FloatVT))

            setOperationAction(

                {ISD::CTLZ, ISD::CTLZ_ZERO_POISON, ISD::CTTZ_ZERO_POISON}, VT,

                Custom);

        }


        setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);

        setOperationAction({ISD::MASKED_UDIV, ISD::MASKED_SDIV,

                            ISD::MASKED_UREM, ISD::MASKED_SREM},

                           VT, Custom);

      }


      for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {

        // There are no extending loads or truncating stores.

        for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {

          setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);

          setTruncStoreAction(VT, InnerVT, Expand);

        }


        if (!useRVVForFixedLengthVectorVT(VT))

          continue;


        // By default everything must be expanded.

        for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)

          setOperationAction(Op, VT, Expand);


        // Custom lower fixed vector undefs to scalable vector undefs to avoid

        // expansion to a build_vector of 0s.

        setOperationAction(ISD::UNDEF, VT, Custom);


        setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,

                            ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,

                            ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,

                            ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},

                           VT, Custom);

        setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);

        setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);


        setOperationAction({ISD::VECTOR_INTERLEAVE, ISD::VECTOR_DEINTERLEAVE},

                           VT, Custom);


        setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,

                            ISD::MGATHER, ISD::MSCATTER},

                           VT, Custom);

        setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,

                            ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

                            ISD::EXPERIMENTAL_VP_STRIDED_STORE},

                           VT, Custom);

        setOperationAction(ISD::VP_LOAD_FF, VT, Custom);


        setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);

        setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,

                           Custom);


        setOperationAction(ISD::BITCAST, VT, Custom);


        if (VT.getVectorElementType() == MVT::f16 &&

            !Subtarget.hasVInstructionsF16()) {

          setOperationAction(

              {ISD::VP_MERGE, ISD::VSELECT, ISD::SELECT}, VT,

              Custom);

          setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);

          setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);

          if (Subtarget.hasStdExtZfhmin()) {

            setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

          } else {

            // We need to custom legalize f16 build vectors if Zfhmin isn't

            // available.

            setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);

          }

          setOperationAction(ISD::FNEG, VT, Expand);

          setOperationAction(ISD::FABS, VT, Expand);

          setOperationAction(ISD::FCOPYSIGN, VT, Expand);

          MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

          // Don't promote f16 vector operations to f32 if f32 vector type is

          // not legal.

          // Custom lower maximum LMUL case to split to 2 half LMUL operations.

          // TODO: Support more operations.

          if (!isTypeLegal(F32VecVT)) {

            setOperationAction({ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX,

                                ISD::VECREDUCE_FMAXIMUM,

                                ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FADD},

                               VT, Custom);

            setOperationAction(ISD::SETCC, VT, Custom);

            continue;

          }

          setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);

          setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);

          continue;

        }


        if (VT.getVectorElementType() == MVT::bf16) {

          setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);

          setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);

          if (Subtarget.hasStdExtZfbfmin()) {

            setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

          } else {

            // We need to custom legalize bf16 build vectors if Zfbfmin isn't

            // available.

            setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);

          }

          if (Subtarget.hasVInstructionsBF16()) {

            setOperationAction(ZvfbfaOps, VT, Custom);

            setCondCodeAction(VFPCCToExpand, VT, Expand);

          }

          setOperationAction(

              {ISD::VP_MERGE, ISD::VSELECT, ISD::SELECT}, VT,

              Custom);

          MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

          // Don't promote bf16 vector operations to f32 if f32 vector type is

          // not legal.

          // Custom lower maximum LMUL case to split to 2 half LMUL operations.

          // TODO: Support more operations.

          if (!isTypeLegal(F32VecVT)) {

            setOperationAction({ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX,

                                ISD::VECREDUCE_FMAXIMUM,

                                ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FADD},

                               VT, Custom);

            setOperationAction(ISD::SETCC, VT, Custom);

            continue;

          }


          if (Subtarget.hasVInstructionsBF16())

            setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);

          else

            setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);

          setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);

          continue;

        }


        setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,

                           Custom);


        setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,

                            ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,

                            ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,

                            ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,

                            ISD::FMAXIMUM, ISD::FMINIMUM},

                           VT, Custom);


        setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,

                            ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,

                            ISD::LLRINT, ISD::LROUND, ISD::LLROUND,

                            ISD::FNEARBYINT, ISD::FCANONICALIZE},

                           VT, Custom);


        setCondCodeAction(VFPCCToExpand, VT, Expand);


        setOperationAction(ISD::SETCC, VT, Custom);

        setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);


        setOperationAction(FloatingPointVecReduceOps, VT, Custom);


        setOperationAction(FloatingPointVPOps, VT, Custom);


        setOperationAction(

            {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,

             ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,

             ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,

             ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,

             ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},

            VT, Custom);

      }


      // Custom-legalize bitcasts from fixed-length vectors to scalar types.

      setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);

      if (Subtarget.is64Bit())

        setOperationAction(ISD::BITCAST, MVT::i64, Custom);

      if (Subtarget.hasStdExtZfhminOrZhinxmin())

        setOperationAction(ISD::BITCAST, MVT::f16, Custom);

      if (Subtarget.hasStdExtZfbfmin())

        setOperationAction(ISD::BITCAST, MVT::bf16, Custom);

      if (Subtarget.hasStdExtFOrZfinx())

        setOperationAction(ISD::BITCAST, MVT::f32, Custom);

      if (Subtarget.hasStdExtDOrZdinx())

        setOperationAction(ISD::BITCAST, MVT::f64, Custom);

    }

  }


  if (Subtarget.hasStdExtZaamo())

    setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);


  if (Subtarget.hasForcedAtomics()) {

    // Force __sync libcalls to be emitted for atomic rmw/cas operations.

    setOperationAction(

        {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,

         ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,

         ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,

         ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},

        XLenVT, LibCall);

  }


  if (Subtarget.hasVendorXTHeadMemIdx()) {

    for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {

      setIndexedLoadAction(im, MVT::i8, Legal);

      setIndexedStoreAction(im, MVT::i8, Legal);

      setIndexedLoadAction(im, MVT::i16, Legal);

      setIndexedStoreAction(im, MVT::i16, Legal);

      setIndexedLoadAction(im, MVT::i32, Legal);

      setIndexedStoreAction(im, MVT::i32, Legal);


      if (Subtarget.is64Bit()) {

        setIndexedLoadAction(im, MVT::i64, Legal);

        setIndexedStoreAction(im, MVT::i64, Legal);

      }

    }

  }


  if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {

    setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);

    setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);

    setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);


    setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);

    setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);

    setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);

  }


  // zve32x is broken for partial_reduce_umla, but let's not make it worse.

  if (Subtarget.hasStdExtZvdot4a8i() && Subtarget.getELen() >= 64) {

    static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,

                                      ISD::PARTIAL_REDUCE_UMLA,

                                      ISD::PARTIAL_REDUCE_SUMLA};

    setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);

    setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);

    setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);

    setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);

    setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);


    if (Subtarget.useRVVForFixedLengthVectors()) {

      for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {

        if (VT.getVectorElementType() != MVT::i32 ||

            !useRVVForFixedLengthVectorVT(VT))

          continue;

        ElementCount EC = VT.getVectorElementCount();

        MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));

        setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);

      }

    }

  }


  // Customize load and store operation for bf16 if zfh isn't enabled.

  if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {

    setOperationAction(ISD::LOAD, MVT::bf16, Custom);

    setOperationAction(ISD::STORE, MVT::bf16, Custom);

  }


  // Function alignments.

  const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);

  setMinFunctionAlignment(FunctionAlignment);

  // Set preferred alignments.

  setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());

  setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());


  setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,

                       ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,

                       ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT,

                       ISD::SRA});

  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);


  if (Subtarget.hasStdExtFOrZfinx())

    setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});


  // Allow scalar min/max to be combined with vector reductions.

  if (Subtarget.hasVInstructions())

    setTargetDAGCombine({ISD::UMAX, ISD::UMIN});

  if (Subtarget.hasVInstructions() || Subtarget.hasStdExtP())

    setTargetDAGCombine({ISD::SMAX, ISD::SMIN});


  if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||

      Subtarget.hasVInstructions() || Subtarget.hasStdExtP())

    setTargetDAGCombine(ISD::TRUNCATE);


  if (Subtarget.hasStdExtZbkb())

    setTargetDAGCombine(ISD::BITREVERSE);


  if (Subtarget.hasStdExtFOrZfinx())

    setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,

                         ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});

  if (Subtarget.hasVInstructions())

    setTargetDAGCombine({ISD::FCOPYSIGN,

                         ISD::MGATHER,

                         ISD::MSCATTER,

                         ISD::VP_GATHER,

                         ISD::VP_SCATTER,

                         ISD::SRL,

                         ISD::SHL,

                         ISD::STORE,

                         ISD::SPLAT_VECTOR,

                         ISD::BUILD_VECTOR,

                         ISD::CONCAT_VECTORS,

                         ISD::VP_STORE,

                         ISD::EXPERIMENTAL_VP_REVERSE,

                         ISD::SDIV,

                         ISD::UDIV,

                         ISD::SREM,

                         ISD::UREM,

                         ISD::INSERT_VECTOR_ELT,

                         ISD::ABS,

                         ISD::ABS_MIN_POISON,

                         ISD::CTPOP,

                         ISD::VECTOR_SHUFFLE,

                         ISD::FMA,

                         ISD::VSELECT,

                         ISD::VECREDUCE_ADD,

                         ISD::VECTOR_SPLICE_RIGHT});


  if (Subtarget.hasVendorXTHeadMemPair())

    setTargetDAGCombine({ISD::LOAD, ISD::STORE});

  if (Subtarget.useRVVForFixedLengthVectors() || Subtarget.hasStdExtP())

    setTargetDAGCombine(ISD::BITCAST);


  setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);


  setMaxLargeFPConvertBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);


  setJumpIsExpensive(Subtarget.isJumpExpensive());


  // Disable strict node mutation.

  IsStrictFPEnabled = true;

  EnableExtLdPromotion = true;


  // Let the subtarget decide if a predictable select is more expensive than the

  // corresponding branch. This information is used in CGP/SelectOpt to decide

  // when to convert selects into branches.

  PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();


  MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);

  MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);


  MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();

  MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);

  MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);


  MaxStoresPerMemmoveOptSize =

      Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);

  MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);


  MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);

  MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);

}


TargetLoweringBase::LegalizeTypeAction


RISCVTargetLowering::getPreferredVectorAction(MVT VT) const {

  if (Subtarget.is64Bit() && Subtarget.hasStdExtP())

    if (VT == MVT::v2i16 || VT == MVT::v4i8)

      return TypeWidenVector;


  return TargetLoweringBase::getPreferredVectorAction(VT);

}


EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,

                                            LLVMContext &Context,

                                            EVT VT) const {

  if (!VT.isVector())

    return getPointerTy(DL);

  if (Subtarget.hasVInstructions() &&

      (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))

    return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());

  return VT.changeVectorElementTypeToInteger();

}


MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {

  return Subtarget.getXLenVT();

}


// Return false if we can lower get_vector_length to a vsetvli intrinsic.

bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,

                                                      unsigned VF,

                                                      bool IsScalable) const {

  if (!Subtarget.hasVInstructions())

    return true;


  if (!IsScalable)

    return true;


  if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())

    return true;


  // Don't allow VF=1 if those types are't legal.

  if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())

    return true;


  // VLEN=32 support is incomplete.

  if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)

    return true;


  // The maximum VF is for the smallest element width with LMUL=8.

  // VF must be a power of 2.

  unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;

  return VF > MaxVF || !isPowerOf2_32(VF);

}


bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {

  return !Subtarget.hasVInstructions() ||

         VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);

}


void RISCVTargetLowering::getTgtMemIntrinsic(

    SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,

    MachineFunction &MF, unsigned Intrinsic) const {

  IntrinsicInfo Info;

  auto &DL = I.getDataLayout();


  auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,

                                 bool IsUnitStrided, bool UsePtrVal = false) {

    Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;

    // We can't use ptrVal if the intrinsic can access memory before the

    // pointer. This means we can't use it for strided or indexed intrinsics.

    if (UsePtrVal)

      Info.ptrVal = I.getArgOperand(PtrOp);

    else

      Info.fallbackAddressSpace =

          I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();

    Type *MemTy;

    if (IsStore) {

      // Store value is the first operand.

      MemTy = I.getArgOperand(0)->getType();

    } else {

      // Use return type. If it's segment load, return type is a struct.

      MemTy = I.getType();

      if (MemTy->isStructTy())

        MemTy = MemTy->getStructElementType(0);

    }

    if (!IsUnitStrided)

      MemTy = MemTy->getScalarType();


    Info.memVT = getValueType(DL, MemTy);

    if (MemTy->isTargetExtTy()) {

      // RISC-V vector tuple type's alignment type should be its element type.

      if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")

        MemTy = Type::getIntNTy(

            MemTy->getContext(),

            1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))

                     ->getZExtValue());

      Info.align = DL.getABITypeAlign(MemTy);

    } else {

      Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));

    }

    Info.size = MemoryLocation::UnknownSize;

    Info.flags |=

        IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;

    Infos.push_back(Info);

  };


  if (I.hasMetadata(LLVMContext::MD_nontemporal))

    Info.flags |= MachineMemOperand::MONonTemporal;


  Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);

  switch (Intrinsic) {

  default:

    return;

  case Intrinsic::riscv_masked_atomicrmw_xchg:

  case Intrinsic::riscv_masked_atomicrmw_add:

  case Intrinsic::riscv_masked_atomicrmw_sub:

  case Intrinsic::riscv_masked_atomicrmw_nand:

  case Intrinsic::riscv_masked_atomicrmw_max:

  case Intrinsic::riscv_masked_atomicrmw_min:

  case Intrinsic::riscv_masked_atomicrmw_umax:

  case Intrinsic::riscv_masked_atomicrmw_umin:

  case Intrinsic::riscv_masked_cmpxchg:

    // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated

    // narrow atomic operation. These will be expanded to an LR/SC loop that

    // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.

    // will be used to modify the appropriate part of the 4 byte data and

    // preserve the rest.

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.memVT = MVT::i32;

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Info.align = Align(4);

    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |

                 MachineMemOperand::MOVolatile;

    Infos.push_back(Info);

    return;

  case Intrinsic::riscv_seg2_load_mask:

  case Intrinsic::riscv_seg3_load_mask:

  case Intrinsic::riscv_seg4_load_mask:

  case Intrinsic::riscv_seg5_load_mask:

  case Intrinsic::riscv_seg6_load_mask:

  case Intrinsic::riscv_seg7_load_mask:

  case Intrinsic::riscv_seg8_load_mask:

  case Intrinsic::riscv_sseg2_load_mask:

  case Intrinsic::riscv_sseg3_load_mask:

  case Intrinsic::riscv_sseg4_load_mask:

  case Intrinsic::riscv_sseg5_load_mask:

  case Intrinsic::riscv_sseg6_load_mask:

  case Intrinsic::riscv_sseg7_load_mask:

  case Intrinsic::riscv_sseg8_load_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,

                        /*IsUnitStrided*/ false, /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_seg2_store_mask:

  case Intrinsic::riscv_seg3_store_mask:

  case Intrinsic::riscv_seg4_store_mask:

  case Intrinsic::riscv_seg5_store_mask:

  case Intrinsic::riscv_seg6_store_mask:

  case Intrinsic::riscv_seg7_store_mask:

  case Intrinsic::riscv_seg8_store_mask:

    // Operands are (vec, ..., vec, ptr, mask, vl)

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ false, /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_sseg2_store_mask:

  case Intrinsic::riscv_sseg3_store_mask:

  case Intrinsic::riscv_sseg4_store_mask:

  case Intrinsic::riscv_sseg5_store_mask:

  case Intrinsic::riscv_sseg6_store_mask:

  case Intrinsic::riscv_sseg7_store_mask:

  case Intrinsic::riscv_sseg8_store_mask:

    // Operands are (vec, ..., vec, ptr, offset, mask, vl)

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ false, /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_vlm:

    SetRVVLoadStoreInfo(/*PtrOp*/ 0,

                        /*IsStore*/ false,

                        /*IsUnitStrided*/ true,

                        /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_vle:

  case Intrinsic::riscv_vle_mask:

  case Intrinsic::riscv_vleff:

  case Intrinsic::riscv_vleff_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ 1,

                        /*IsStore*/ false,

                        /*IsUnitStrided*/ true,

                        /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_vsm:

  case Intrinsic::riscv_vse:

  case Intrinsic::riscv_vse_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ 1,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ true,

                        /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_vlse:

  case Intrinsic::riscv_vlse_mask:

  case Intrinsic::riscv_vloxei:

  case Intrinsic::riscv_vloxei_mask:

  case Intrinsic::riscv_vluxei:

  case Intrinsic::riscv_vluxei_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ 1,

                        /*IsStore*/ false,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_vsse:

  case Intrinsic::riscv_vsse_mask:

  case Intrinsic::riscv_vsoxei:

  case Intrinsic::riscv_vsoxei_mask:

  case Intrinsic::riscv_vsuxei:

  case Intrinsic::riscv_vsuxei_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ 1,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_vlseg2:

  case Intrinsic::riscv_vlseg3:

  case Intrinsic::riscv_vlseg4:

  case Intrinsic::riscv_vlseg5:

  case Intrinsic::riscv_vlseg6:

  case Intrinsic::riscv_vlseg7:

  case Intrinsic::riscv_vlseg8:

  case Intrinsic::riscv_vlseg2ff:

  case Intrinsic::riscv_vlseg3ff:

  case Intrinsic::riscv_vlseg4ff:

  case Intrinsic::riscv_vlseg5ff:

  case Intrinsic::riscv_vlseg6ff:

  case Intrinsic::riscv_vlseg7ff:

  case Intrinsic::riscv_vlseg8ff:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,

                        /*IsStore*/ false,

                        /*IsUnitStrided*/ false, /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_vlseg2_mask:

  case Intrinsic::riscv_vlseg3_mask:

  case Intrinsic::riscv_vlseg4_mask:

  case Intrinsic::riscv_vlseg5_mask:

  case Intrinsic::riscv_vlseg6_mask:

  case Intrinsic::riscv_vlseg7_mask:

  case Intrinsic::riscv_vlseg8_mask:

  case Intrinsic::riscv_vlseg2ff_mask:

  case Intrinsic::riscv_vlseg3ff_mask:

  case Intrinsic::riscv_vlseg4ff_mask:

  case Intrinsic::riscv_vlseg5ff_mask:

  case Intrinsic::riscv_vlseg6ff_mask:

  case Intrinsic::riscv_vlseg7ff_mask:

  case Intrinsic::riscv_vlseg8ff_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,

                        /*IsStore*/ false,

                        /*IsUnitStrided*/ false, /*UsePtrVal*/ true);

    return;

  case Intrinsic::riscv_vlsseg2:

  case Intrinsic::riscv_vlsseg3:

  case Intrinsic::riscv_vlsseg4:

  case Intrinsic::riscv_vlsseg5:

  case Intrinsic::riscv_vlsseg6:

  case Intrinsic::riscv_vlsseg7:

  case Intrinsic::riscv_vlsseg8:

  case Intrinsic::riscv_vloxseg2:

  case Intrinsic::riscv_vloxseg3:

  case Intrinsic::riscv_vloxseg4:

  case Intrinsic::riscv_vloxseg5:

  case Intrinsic::riscv_vloxseg6:

  case Intrinsic::riscv_vloxseg7:

  case Intrinsic::riscv_vloxseg8:

  case Intrinsic::riscv_vluxseg2:

  case Intrinsic::riscv_vluxseg3:

  case Intrinsic::riscv_vluxseg4:

  case Intrinsic::riscv_vluxseg5:

  case Intrinsic::riscv_vluxseg6:

  case Intrinsic::riscv_vluxseg7:

  case Intrinsic::riscv_vluxseg8:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,

                        /*IsStore*/ false,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_vlsseg2_mask:

  case Intrinsic::riscv_vlsseg3_mask:

  case Intrinsic::riscv_vlsseg4_mask:

  case Intrinsic::riscv_vlsseg5_mask:

  case Intrinsic::riscv_vlsseg6_mask:

  case Intrinsic::riscv_vlsseg7_mask:

  case Intrinsic::riscv_vlsseg8_mask:

  case Intrinsic::riscv_vloxseg2_mask:

  case Intrinsic::riscv_vloxseg3_mask:

  case Intrinsic::riscv_vloxseg4_mask:

  case Intrinsic::riscv_vloxseg5_mask:

  case Intrinsic::riscv_vloxseg6_mask:

  case Intrinsic::riscv_vloxseg7_mask:

  case Intrinsic::riscv_vloxseg8_mask:

  case Intrinsic::riscv_vluxseg2_mask:

  case Intrinsic::riscv_vluxseg3_mask:

  case Intrinsic::riscv_vluxseg4_mask:

  case Intrinsic::riscv_vluxseg5_mask:

  case Intrinsic::riscv_vluxseg6_mask:

  case Intrinsic::riscv_vluxseg7_mask:

  case Intrinsic::riscv_vluxseg8_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,

                        /*IsStore*/ false,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_vsseg2:

  case Intrinsic::riscv_vsseg3:

  case Intrinsic::riscv_vsseg4:

  case Intrinsic::riscv_vsseg5:

  case Intrinsic::riscv_vsseg6:

  case Intrinsic::riscv_vsseg7:

  case Intrinsic::riscv_vsseg8:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_vsseg2_mask:

  case Intrinsic::riscv_vsseg3_mask:

  case Intrinsic::riscv_vsseg4_mask:

  case Intrinsic::riscv_vsseg5_mask:

  case Intrinsic::riscv_vsseg6_mask:

  case Intrinsic::riscv_vsseg7_mask:

  case Intrinsic::riscv_vsseg8_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_vssseg2:

  case Intrinsic::riscv_vssseg3:

  case Intrinsic::riscv_vssseg4:

  case Intrinsic::riscv_vssseg5:

  case Intrinsic::riscv_vssseg6:

  case Intrinsic::riscv_vssseg7:

  case Intrinsic::riscv_vssseg8:

  case Intrinsic::riscv_vsoxseg2:

  case Intrinsic::riscv_vsoxseg3:

  case Intrinsic::riscv_vsoxseg4:

  case Intrinsic::riscv_vsoxseg5:

  case Intrinsic::riscv_vsoxseg6:

  case Intrinsic::riscv_vsoxseg7:

  case Intrinsic::riscv_vsoxseg8:

  case Intrinsic::riscv_vsuxseg2:

  case Intrinsic::riscv_vsuxseg3:

  case Intrinsic::riscv_vsuxseg4:

  case Intrinsic::riscv_vsuxseg5:

  case Intrinsic::riscv_vsuxseg6:

  case Intrinsic::riscv_vsuxseg7:

  case Intrinsic::riscv_vsuxseg8:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_vssseg2_mask:

  case Intrinsic::riscv_vssseg3_mask:

  case Intrinsic::riscv_vssseg4_mask:

  case Intrinsic::riscv_vssseg5_mask:

  case Intrinsic::riscv_vssseg6_mask:

  case Intrinsic::riscv_vssseg7_mask:

  case Intrinsic::riscv_vssseg8_mask:

  case Intrinsic::riscv_vsoxseg2_mask:

  case Intrinsic::riscv_vsoxseg3_mask:

  case Intrinsic::riscv_vsoxseg4_mask:

  case Intrinsic::riscv_vsoxseg5_mask:

  case Intrinsic::riscv_vsoxseg6_mask:

  case Intrinsic::riscv_vsoxseg7_mask:

  case Intrinsic::riscv_vsoxseg8_mask:

  case Intrinsic::riscv_vsuxseg2_mask:

  case Intrinsic::riscv_vsuxseg3_mask:

  case Intrinsic::riscv_vsuxseg4_mask:

  case Intrinsic::riscv_vsuxseg5_mask:

  case Intrinsic::riscv_vsuxseg6_mask:

  case Intrinsic::riscv_vsuxseg7_mask:

  case Intrinsic::riscv_vsuxseg8_mask:

    SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,

                        /*IsStore*/ true,

                        /*IsUnitStrided*/ false);

    return;

  case Intrinsic::riscv_sf_vlte8:

  case Intrinsic::riscv_sf_vlte16:

  case Intrinsic::riscv_sf_vlte32:

  case Intrinsic::riscv_sf_vlte64:

    Info.opc = ISD::INTRINSIC_VOID;

    Info.ptrVal = I.getArgOperand(1);

    switch (Intrinsic) {

    case Intrinsic::riscv_sf_vlte8:

      Info.memVT = MVT::i8;

      Info.align = Align(1);

      break;

    case Intrinsic::riscv_sf_vlte16:

      Info.memVT = MVT::i16;

      Info.align = Align(2);

      break;

    case Intrinsic::riscv_sf_vlte32:

      Info.memVT = MVT::i32;

      Info.align = Align(4);

      break;

    case Intrinsic::riscv_sf_vlte64:

      Info.memVT = MVT::i64;

      Info.align = Align(8);

      break;

    }

    Info.size = MemoryLocation::UnknownSize;

    Info.flags |= MachineMemOperand::MOLoad;

    Infos.push_back(Info);

    return;

  case Intrinsic::riscv_sf_vste8:

  case Intrinsic::riscv_sf_vste16:

  case Intrinsic::riscv_sf_vste32:

  case Intrinsic::riscv_sf_vste64:

    Info.opc = ISD::INTRINSIC_VOID;

    Info.ptrVal = I.getArgOperand(1);

    switch (Intrinsic) {

    case Intrinsic::riscv_sf_vste8:

      Info.memVT = MVT::i8;

      Info.align = Align(1);

      break;

    case Intrinsic::riscv_sf_vste16:

      Info.memVT = MVT::i16;

      Info.align = Align(2);

      break;

    case Intrinsic::riscv_sf_vste32:

      Info.memVT = MVT::i32;

      Info.align = Align(4);

      break;

    case Intrinsic::riscv_sf_vste64:

      Info.memVT = MVT::i64;

      Info.align = Align(8);

      break;

    }

    Info.size = MemoryLocation::UnknownSize;

    Info.flags |= MachineMemOperand::MOStore;

    Infos.push_back(Info);

    return;

  }

}


bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,

                                                const AddrMode &AM, Type *Ty,

                                                unsigned AS,

                                                Instruction *I) const {

  // No global is ever allowed as a base.

  if (AM.BaseGV)

    return false;


  // None of our addressing modes allows a scalable offset

  if (AM.ScalableOffset)

    return false;


  // RVV instructions only support register addressing.

  if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))

    return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;


  // Require a 12-bit signed offset.

  if (!isInt<12>(AM.BaseOffs))

    return false;


  switch (AM.Scale) {

  case 0: // "r+i" or just "i", depending on HasBaseReg.

    break;

  case 1:

    if (!AM.HasBaseReg) // allow "r+i".

      break;

    return false; // disallow "r+r" or "r+r+i".

  default:

    return false;

  }


  return true;

}


bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {

  return isInt<12>(Imm);

}


bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {

  return isInt<12>(Imm);

}


// On RV32, 64-bit integers are split into their high and low parts and held

// in two different registers, so the trunc is free since the low register can

// just be used.

// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of

// isTruncateFree?


bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {

  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())

    return false;

  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();

  unsigned DestBits = DstTy->getPrimitiveSizeInBits();

  return (SrcBits == 64 && DestBits == 32);

}


bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {

  // We consider i64->i32 free on RV64 since we have good selection of W

  // instructions that make promoting operations back to i64 free in many cases.

  if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||

      !DstVT.isInteger())

    return false;

  unsigned SrcBits = SrcVT.getSizeInBits();

  unsigned DestBits = DstVT.getSizeInBits();

  return (SrcBits == 64 && DestBits == 32);

}


bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {

  EVT SrcVT = Val.getValueType();

  // free truncate from vnsrl and vnsra

  if (Subtarget.hasVInstructions() &&

      (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&

      SrcVT.isVector() && VT2.isVector()) {

    unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();

    unsigned DestBits = VT2.getVectorElementType().getSizeInBits();

    if (SrcBits == DestBits * 2) {

      return true;

    }

  }

  return TargetLowering::isTruncateFree(Val, VT2);

}


bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {

  // Zexts are free if they can be combined with a load.

  // Don't advertise i32->i64 zextload as being free for RV64. It interacts

  // poorly with type legalization of compares preferring sext.

  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {

    EVT MemVT = LD->getMemoryVT();

    if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&

        (LD->getExtensionType() == ISD::NON_EXTLOAD ||

         LD->getExtensionType() == ISD::ZEXTLOAD))

      return true;

  }


  return TargetLowering::isZExtFree(Val, VT2);

}


bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {

  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;

}


bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {

  return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);

}


bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {

  return Subtarget.hasCTZLike();

}


bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {

  return Subtarget.hasCLZLike();

}


bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(

    const Instruction &AndI) const {

  // We expect to be able to match a bit extraction instruction if the Zbs

  // extension is supported and the mask is a power of two. However, we

  // conservatively return false if the mask would fit in an ANDI instruction,

  // on the basis that it's possible the sinking+duplication of the AND in

  // CodeGenPrepare triggered by this hook wouldn't decrease the instruction

  // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).

  if (!Subtarget.hasBEXTILike())

    return false;

  ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));

  if (!Mask)

    return false;

  return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();

}


bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {

  EVT VT = Y.getValueType();


  if (VT.isVector())

    return false;


  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&

         (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());

}


bool RISCVTargetLowering::hasAndNot(SDValue Y) const {

  EVT VT = Y.getValueType();


  if (!VT.isVector())

    return hasAndNotCompare(Y);


  return Subtarget.hasStdExtZvkb();

}


bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {

  // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.

  if (Subtarget.hasStdExtZbs())

    return X.getValueType().isScalarInteger();

  auto *C = dyn_cast<ConstantSDNode>(Y);

  // XTheadBs provides th.tst (similar to bexti), if Y is a constant

  if (Subtarget.hasVendorXTHeadBs())

    return C != nullptr;

  // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.

  return C && C->getAPIntValue().ule(10);

}


bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(

    unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,

    SDValue Y) const {

  if (SelectOpcode != ISD::VSELECT)

    return false;


  // Only enable for rvv.

  if (!VT.isVector() || !Subtarget.hasVInstructions())

    return false;


  if (VT.isFixedLengthVector() && !isTypeLegal(VT))

    return false;


  return true;

}


bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

                                                            Type *Ty) const {

  assert(Ty->isIntegerTy());


  unsigned BitSize = Ty->getIntegerBitWidth();

  if (BitSize > Subtarget.getXLen())

    return false;


  // Fast path, assume 32-bit immediates are cheap.

  int64_t Val = Imm.getSExtValue();

  if (isInt<32>(Val))

    return true;


  // A constant pool entry may be more aligned than the load we're trying to

  // replace. If we don't support unaligned scalar mem, prefer the constant

  // pool.

  // TODO: Can the caller pass down the alignment?

  if (!Subtarget.enableUnalignedScalarMem())

    return true;


  // Prefer to keep the load if it would require many instructions.

  // This uses the same threshold we use for constant pools but doesn't

  // check useConstantPoolForLargeInts.

  // TODO: Should we keep the load only when we're definitely going to emit a

  // constant pool?


  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);

  return Seq.size() <= Subtarget.getMaxBuildIntsCost();

}


bool RISCVTargetLowering::

    shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(

        SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,

        unsigned OldShiftOpcode, unsigned NewShiftOpcode,

        SelectionDAG &DAG) const {

  // One interesting pattern that we'd want to form is 'bit extract':

  //   ((1 >> Y) & 1) ==/!= 0

  // But we also need to be careful not to try to reverse that fold.


  // Is this '((1 >> Y) & 1)'?

  if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())

    return false; // Keep the 'bit extract' pattern.


  // Will this be '((1 >> Y) & 1)' after the transform?

  if (NewShiftOpcode == ISD::SRL && CC->isOne())

    return true; // Do form the 'bit extract' pattern.


  // If 'X' is a constant, and we transform, then we will immediately

  // try to undo the fold, thus causing endless combine loop.

  // So only do the transform if X is not a constant. This matches the default

  // implementation of this function.

  return !XC;

}


bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {

  unsigned Opc = VecOp.getOpcode();


  // Assume target opcodes can't be scalarized.

  // TODO - do we have any exceptions?

  if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))

    return false;


  // If the vector op is not supported, try to convert to scalar.

  EVT VecVT = VecOp.getValueType();

  if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))

    return true;


  // If the vector op is supported, but the scalar op is not, the transform may

  // not be worthwhile.

  // Permit a vector binary operation can be converted to scalar binary

  // operation which is custom lowered with illegal type.

  EVT ScalarVT = VecVT.getScalarType();

  return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||

         isOperationCustom(Opc, ScalarVT);

}


bool RISCVTargetLowering::isOffsetFoldingLegal(

    const GlobalAddressSDNode *GA) const {

  // In order to maximise the opportunity for common subexpression elimination,

  // keep a separate ADD node for the global address offset instead of folding

  // it in the global address node. Later peephole optimisations may choose to

  // fold it back in when profitable.

  return false;

}


// Returns 0-31 if the fli instruction is available for the type and this is

// legal FP immediate for the type. Returns -1 otherwise.


int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {

  if (!Subtarget.hasStdExtZfa())

    return -1;


  bool IsSupportedVT = false;

  if (VT == MVT::f16) {

    IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();

  } else if (VT == MVT::f32) {

    IsSupportedVT = true;

  } else if (VT == MVT::f64) {

    assert(Subtarget.hasStdExtD() && "Expect D extension");

    IsSupportedVT = true;

  }


  if (!IsSupportedVT)

    return -1;


  return RISCVLoadFPImm::getLoadFPImm(Imm);

}


bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,

                                       bool ForCodeSize) const {

  bool IsLegalVT = false;

  if (VT == MVT::f16)

    IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();

  else if (VT == MVT::f32)

    IsLegalVT = Subtarget.hasStdExtFOrZfinx();

  else if (VT == MVT::f64)

    IsLegalVT = Subtarget.hasStdExtDOrZdinx();

  else if (VT == MVT::bf16)

    IsLegalVT = Subtarget.hasStdExtZfbfmin();


  if (!IsLegalVT)

    return false;


  if (getLegalZfaFPImm(Imm, VT) >= 0)

    return true;


  // Some constants can be produced by fli+fneg.

  if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)

    return true;


  // Cannot create a 64 bit floating-point immediate value for rv32.

  if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {

    // td can handle +0.0 or -0.0 already.

    // -0.0 can be created by fmv + fneg.

    return Imm.isZero();

  }


  // Special case: fmv + fneg

  if (Imm.isNegZero())

    return true;


  // Building an integer and then converting requires a fmv at the end of

  // the integer sequence. The fmv is not required for Zfinx.

  const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;

  const int Cost =

      FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),

                                           Subtarget.getXLen(), Subtarget);

  return Cost <= FPImmCost;

}


// TODO: This is very conservative.


bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,

                                                  unsigned Index) const {

  if (!Subtarget.hasVInstructions())

    return false;


  if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))

    return false;


  // Extracts from index 0 are just subreg extracts.

  if (Index == 0)

    return true;


  // Only support extracting a fixed from a fixed vector for now.

  if (ResVT.isScalableVector() || SrcVT.isScalableVector())

    return false;


  EVT EltVT = ResVT.getVectorElementType();

  assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");


  // The smallest type we can slide is i8.

  if (EltVT == MVT::i1)

    return false;


  unsigned ResElts = ResVT.getVectorNumElements();

  unsigned SrcElts = SrcVT.getVectorNumElements();


  unsigned MinVLen = Subtarget.getRealMinVLen();

  unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();


  // If we're extracting only data from the first VLEN bits of the source

  // then we can always do this with an m1 vslidedown.vx.  Restricting the

  // Index ensures we can use a vslidedown.vi.

  // TODO: We can generalize this when the exact VLEN is known.

  if (Index + ResElts <= MinVLMAX && Index < 31)

    return true;


  // Convervatively only handle extracting half of a vector.

  // TODO: We can do arbitrary slidedowns, but for now only support extracting

  // the upper half of a vector until we have more test coverage.

  // TODO: For sizes which aren't multiples of VLEN sizes, this may not be

  // a cheap extract.  However, this case is important in practice for

  // shuffled extracts of longer vectors.  How resolve?

  return (ResElts * 2) == SrcElts && Index == ResElts;

}


MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,

                                                      CallingConv::ID CC,

                                                      EVT VT) const {

  // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.

  // We might still end up using a GPR but that will be decided based on ABI.

  if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&

      !Subtarget.hasStdExtZfhminOrZhinxmin())

    return MVT::f32;


  return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);

}


unsigned


RISCVTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT,

                                     std::optional<MVT> RegisterVT) const {

  // Pair inline assembly operand

  if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&

      *RegisterVT == MVT::Untyped)

    return 1;


  return TargetLowering::getNumRegisters(Context, VT, RegisterVT);

}


unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,

                                                           CallingConv::ID CC,

                                                           EVT VT) const {

  // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.

  // We might still end up using a GPR but that will be decided based on ABI.

  if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&

      !Subtarget.hasStdExtZfhminOrZhinxmin())

    return 1;


  return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);

}


// Changes the condition code and swaps operands if necessary, so the SetCC

// operation matches one of the comparisons supported directly by branches

// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare

// with 1/-1.


static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,

                                    ISD::CondCode &CC, SelectionDAG &DAG,

                                    const RISCVSubtarget &Subtarget) {

  // If this is a single bit test that can't be handled by ANDI, shift the

  // bit to be tested to the MSB and perform a signed compare with 0.

  if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&

      LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&

      isa<ConstantSDNode>(LHS.getOperand(1)) &&

      // XAndesPerf supports branch on test bit.

      !Subtarget.hasVendorXAndesPerf()) {

    uint64_t Mask = LHS.getConstantOperandVal(1);

    if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {

      unsigned ShAmt = 0;

      if (isPowerOf2_64(Mask)) {

        CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;

        ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);

      } else {

        ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);

      }


      LHS = LHS.getOperand(0);

      if (ShAmt != 0)

        LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,

                          DAG.getConstant(ShAmt, DL, LHS.getValueType()));

      return;

    }

  }


  if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {

    int64_t C = RHSC->getSExtValue();

    switch (CC) {

    default: break;

    case ISD::SETGT:

      // Convert X > -1 to X >= 0.

      if (C == -1) {

        RHS = DAG.getConstant(0, DL, RHS.getValueType());

        CC = ISD::SETGE;

        return;

      }

      if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&

          C != INT64_MAX && isInt<5>(C + 1)) {

        // We have a conditional move instruction for SETGE but not SETGT.

        // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.

        RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());

        CC = ISD::SETGE;

        return;

      }

      if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {

        // We have a branch immediate instruction for SETGE but not SETGT.

        // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.

        RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());

        CC = ISD::SETGE;

        return;

      }

      break;

    case ISD::SETLT:

      // Convert X < 1 to 0 >= X.

      if (C == 1) {

        RHS = LHS;

        LHS = DAG.getConstant(0, DL, RHS.getValueType());

        CC = ISD::SETGE;

        return;

      }

      break;

    case ISD::SETUGT:

      if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&

          C != INT64_MAX && isUInt<5>(C + 1)) {

        // We have a conditional move instruction for SETUGE but not SETUGT.

        // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.

        RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());

        CC = ISD::SETUGE;

        return;

      }

      if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {

        // We have a branch immediate instruction for SETUGE but not SETUGT.

        // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned

        // immediate.

        RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());

        CC = ISD::SETUGE;

        return;

      }

      break;

    }

  }


  switch (CC) {

  default:

    break;

  case ISD::SETGT:

  case ISD::SETLE:

  case ISD::SETUGT:

  case ISD::SETULE:

    CC = ISD::getSetCCSwappedOperands(CC);

    std::swap(LHS, RHS);

    break;

  }

}


RISCVVType::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {

  if (VT.isRISCVVectorTuple()) {

    if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&

        VT.SimpleTy <= MVT::riscv_nxv1i8x8)

      return RISCVVType::LMUL_F8;

    if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&

        VT.SimpleTy <= MVT::riscv_nxv2i8x8)

      return RISCVVType::LMUL_F4;

    if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&

        VT.SimpleTy <= MVT::riscv_nxv4i8x8)

      return RISCVVType::LMUL_F2;

    if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&

        VT.SimpleTy <= MVT::riscv_nxv8i8x8)

      return RISCVVType::LMUL_1;

    if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&

        VT.SimpleTy <= MVT::riscv_nxv16i8x4)

      return RISCVVType::LMUL_2;

    if (VT.SimpleTy == MVT::riscv_nxv32i8x2)

      return RISCVVType::LMUL_4;

    llvm_unreachable("Invalid vector tuple type LMUL.");

  }


  assert(VT.isScalableVector() && "Expecting a scalable vector type");

  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();

  if (VT.getVectorElementType() == MVT::i1)

    KnownSize *= 8;


  switch (KnownSize) {

  default:

    llvm_unreachable("Invalid LMUL.");

  case 8:

    return RISCVVType::LMUL_F8;

  case 16:

    return RISCVVType::LMUL_F4;

  case 32:

    return RISCVVType::LMUL_F2;

  case 64:

    return RISCVVType::LMUL_1;

  case 128:

    return RISCVVType::LMUL_2;

  case 256:

    return RISCVVType::LMUL_4;

  case 512:

    return RISCVVType::LMUL_8;

  }

}


unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVType::VLMUL LMul) {

  switch (LMul) {

  default:

    llvm_unreachable("Invalid LMUL.");

  case RISCVVType::LMUL_F8:

  case RISCVVType::LMUL_F4:

  case RISCVVType::LMUL_F2:

  case RISCVVType::LMUL_1:

    return RISCV::VRRegClassID;

  case RISCVVType::LMUL_2:

    return RISCV::VRM2RegClassID;

  case RISCVVType::LMUL_4:

    return RISCV::VRM4RegClassID;

  case RISCVVType::LMUL_8:

    return RISCV::VRM8RegClassID;

  }

}


unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {

  RISCVVType::VLMUL LMUL = getLMUL(VT);

  if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||

      LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {

    static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,

                  "Unexpected subreg numbering");

    return RISCV::sub_vrm1_0 + Index;

  }

  if (LMUL == RISCVVType::LMUL_2) {

    static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,

                  "Unexpected subreg numbering");

    return RISCV::sub_vrm2_0 + Index;

  }

  if (LMUL == RISCVVType::LMUL_4) {

    static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,

                  "Unexpected subreg numbering");

    return RISCV::sub_vrm4_0 + Index;

  }

  llvm_unreachable("Invalid vector type.");

}


unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {

  if (VT.isRISCVVectorTuple()) {

    unsigned NF = VT.getRISCVVectorTupleNumFields();

    unsigned RegsPerField =

        std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /

                         (NF * RISCV::RVVBitsPerBlock));

    switch (RegsPerField) {

    case 1:

      if (NF == 2)

        return RISCV::VRN2M1RegClassID;

      if (NF == 3)

        return RISCV::VRN3M1RegClassID;

      if (NF == 4)

        return RISCV::VRN4M1RegClassID;

      if (NF == 5)

        return RISCV::VRN5M1RegClassID;

      if (NF == 6)

        return RISCV::VRN6M1RegClassID;

      if (NF == 7)

        return RISCV::VRN7M1RegClassID;

      if (NF == 8)

        return RISCV::VRN8M1RegClassID;

      break;

    case 2:

      if (NF == 2)

        return RISCV::VRN2M2RegClassID;

      if (NF == 3)

        return RISCV::VRN3M2RegClassID;

      if (NF == 4)

        return RISCV::VRN4M2RegClassID;

      break;

    case 4:

      assert(NF == 2);

      return RISCV::VRN2M4RegClassID;

    default:

      break;

    }

    llvm_unreachable("Invalid vector tuple type RegClass.");

  }


  if (VT.getVectorElementType() == MVT::i1)

    return RISCV::VRRegClassID;

  return getRegClassIDForLMUL(getLMUL(VT));

}


// Attempt to decompose a subvector insert/extract between VecVT and

// SubVecVT via subregister indices. Returns the subregister index that

// can perform the subvector insert/extract with the given element index, as

// well as the index corresponding to any leftover subvectors that must be

// further inserted/extracted within the register class for SubVecVT.

std::pair<unsigned, unsigned>


RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

    MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,

    const RISCVRegisterInfo *TRI) {

  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&

                 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&

                 RISCV::VRM2RegClassID > RISCV::VRRegClassID),

                "Register classes not ordered");

  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);

  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);


  // If VecVT is a vector tuple type, either it's the tuple type with same

  // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.

  if (VecVT.isRISCVVectorTuple()) {

    if (VecRegClassID == SubRegClassID)

      return {RISCV::NoSubRegister, 0};


    assert(SubVecVT.isScalableVector() &&

           "Only allow scalable vector subvector.");

    assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&

           "Invalid vector tuple insert/extract for vector and subvector with "

           "different LMUL.");

    return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};

  }


  // Try to compose a subregister index that takes us from the incoming

  // LMUL>1 register class down to the outgoing one. At each step we half

  // the LMUL:

  //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0

  // Note that this is not guaranteed to find a subregister index, such as

  // when we are extracting from one VR type to another.

  unsigned SubRegIdx = RISCV::NoSubRegister;

  for (const unsigned RCID :

       {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})

    if (VecRegClassID > RCID && SubRegClassID <= RCID) {

      VecVT = VecVT.getHalfNumVectorElementsVT();

      bool IsHi =

          InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();

      SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,

                                            getSubregIndexByMVT(VecVT, IsHi));

      if (IsHi)

        InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();

    }

  return {SubRegIdx, InsertExtractIdx};

}


// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar

// stores for those types.

bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {

  if (Subtarget.hasStdExtP() && !Subtarget.is64Bit() &&

      (VT == MVT::i32 || VT == MVT::v2i16 || VT == MVT::v4i8))

    return false;


  return !Subtarget.useRVVForFixedLengthVectors() ||

         VT.isFixedLengthVectorOf(MVT::i1);

}


bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {

  if (!ScalarTy.isSimple())

    return false;

  switch (ScalarTy.getSimpleVT().SimpleTy) {

  case MVT::iPTR:

    return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;

  case MVT::i8:

  case MVT::i16:

  case MVT::i32:

    return Subtarget.hasVInstructions();

  case MVT::i64:

    return Subtarget.hasVInstructionsI64();

  case MVT::f16:

    return Subtarget.hasVInstructionsF16Minimal();

  case MVT::bf16:

    return Subtarget.hasVInstructionsBF16Minimal();

  case MVT::f32:

    return Subtarget.hasVInstructionsF32();

  case MVT::f64:

    return Subtarget.hasVInstructionsF64();

  default:

    return false;

  }

}


unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {

  return NumRepeatedDivisors;

}


static SDValue getVLOperand(SDValue Op) {

  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||

          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&

         "Unexpected opcode");

  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;

  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);

  const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =

      RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);

  if (!II)

    return SDValue();

  return Op.getOperand(II->VLOperand + 1 + HasChain);

}


static bool useRVVForFixedLengthVectorVT(MVT VT,

                                         const RISCVSubtarget &Subtarget) {

  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");

  if (!Subtarget.useRVVForFixedLengthVectors())

    return false;


  // We only support a set of vector types with a consistent maximum fixed size

  // across all supported vector element types to avoid legalization issues.

  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest

  // fixed-length vector type we support is 1024 bytes.

  if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)

    return false;


  unsigned MinVLen = Subtarget.getRealMinVLen();


  MVT EltVT = VT.getVectorElementType();


  // Don't use RVV for vectors we cannot scalarize if required.

  switch (EltVT.SimpleTy) {

  // i1 is supported but has different rules.

  default:

    return false;

  case MVT::i1:

    // Masks can only use a single register.

    if (VT.getVectorNumElements() > MinVLen)

      return false;

    MinVLen /= 8;

    break;

  case MVT::i8:

  case MVT::i16:

  case MVT::i32:

    break;

  case MVT::i64:

    if (!Subtarget.hasVInstructionsI64())

      return false;

    break;

  case MVT::f16:

    if (!Subtarget.hasVInstructionsF16Minimal())

      return false;

    break;

  case MVT::bf16:

    if (!Subtarget.hasVInstructionsBF16Minimal())

      return false;

    break;

  case MVT::f32:

    if (!Subtarget.hasVInstructionsF32())

      return false;

    break;

  case MVT::f64:

    if (!Subtarget.hasVInstructionsF64())

      return false;

    break;

  }


  // Reject elements larger than ELEN.

  if (EltVT.getSizeInBits() > Subtarget.getELen())

    return false;


  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);

  // Don't use RVV for types that don't fit.

  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())

    return false;


  // TODO: Perhaps an artificial restriction, but worth having whilst getting

  // the base fixed length RVV support in place.

  if (!VT.isPow2VectorType())

    return false;


  return true;

}


bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {

  return ::useRVVForFixedLengthVectorVT(VT, Subtarget);

}


// Return the largest legal scalable vector type that matches VT's element type.


static MVT getContainerForFixedLengthVector(MVT VT,

                                            const RISCVSubtarget &Subtarget) {

  // This may be called before legal types are setup.

  assert(((VT.isFixedLengthVector() &&

           Subtarget.getTargetLowering()->isTypeLegal(VT)) ||

          useRVVForFixedLengthVectorVT(VT, Subtarget)) &&

         "Expected legal fixed length vector!");


  unsigned MinVLen = Subtarget.getRealMinVLen();

  unsigned MaxELen = Subtarget.getELen();


  MVT EltVT = VT.getVectorElementType();

  switch (EltVT.SimpleTy) {

  default:

    llvm_unreachable("unexpected element type for RVV container");

  case MVT::i1:

  case MVT::i8:

  case MVT::i16:

  case MVT::i32:

  case MVT::i64:

  case MVT::bf16:

  case MVT::f16:

  case MVT::f32:

  case MVT::f64: {

    // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for

    // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within

    // each fractional LMUL we support SEW between 8 and LMUL*ELEN.

    unsigned NumElts =

        (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;

    NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);

    assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");

    return MVT::getScalableVectorVT(EltVT, NumElts);

  }

  }

}


MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {

  return ::getContainerForFixedLengthVector(VT, getSubtarget());

}


// Grow V to consume an entire RVV register.


static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,

                                       const RISCVSubtarget &Subtarget) {

  assert(VT.isScalableVector() &&

         "Expected to convert into a scalable vector!");

  assert(V.getValueType().isFixedLengthVector() &&

         "Expected a fixed length vector operand!");

  SDLoc DL(V);

  return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);

}


// Shrink V so it's just big enough to maintain a VT's worth of data.


static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,

                                         const RISCVSubtarget &Subtarget) {

  assert(VT.isFixedLengthVector() &&

         "Expected to convert into a fixed length vector!");

  assert(V.getValueType().isScalableVector() &&

         "Expected a scalable vector operand!");

  SDLoc DL(V);

  return DAG.getExtractSubvector(DL, VT, V, 0);

}


/// Return the type of the mask type suitable for masking the provided

/// vector type.  This is simply an i1 element type vector of the same

/// (possibly scalable) length.


static MVT getMaskTypeFor(MVT VecVT) {

  assert(VecVT.isVector());

  ElementCount EC = VecVT.getVectorElementCount();

  return MVT::getVectorVT(MVT::i1, EC);

}


/// Creates an all ones mask suitable for masking a vector of type VecTy with

/// vector length VL.  .


static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,

                              SelectionDAG &DAG) {

  MVT MaskVT = getMaskTypeFor(VecVT);

  return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);

}


static std::pair<SDValue, SDValue>


getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,

                        const RISCVSubtarget &Subtarget) {

  assert(VecVT.isScalableVector() && "Expecting a scalable vector");

  SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());

  SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);

  return {Mask, VL};

}


static std::pair<SDValue, SDValue>


getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,

                SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {

  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");

  SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());

  SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);

  return {Mask, VL};

}


// Gets the two common "VL" operands: an all-ones mask and the vector length.

// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is

// the vector type that the fixed-length vector is contained in. Otherwise if

// VecVT is scalable, then ContainerVT should be the same as VecVT.

static std::pair<SDValue, SDValue>


getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,

                const RISCVSubtarget &Subtarget) {

  if (VecVT.isFixedLengthVector())

    return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,

                           Subtarget);

  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");

  return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);

}


SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,

                                          SelectionDAG &DAG) const {

  assert(VecVT.isScalableVector() && "Expected scalable vector");

  return DAG.getElementCount(DL, Subtarget.getXLenVT(),

                             VecVT.getVectorElementCount());

}


std::pair<unsigned, unsigned>


RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,

                                        const RISCVSubtarget &Subtarget) {

  assert(VecVT.isScalableVector() && "Expected scalable vector");


  unsigned EltSize = VecVT.getScalarSizeInBits();

  unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();


  unsigned VectorBitsMax = Subtarget.getRealMaxVLen();

  unsigned MaxVLMAX =

      RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);


  unsigned VectorBitsMin = Subtarget.getRealMinVLen();

  unsigned MinVLMAX =

      RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);


  return std::make_pair(MinVLMAX, MaxVLMAX);

}


// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few

// of either is (currently) supported. This can get us into an infinite loop

// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR

// as a ..., etc.

// Until either (or both) of these can reliably lower any node, reporting that

// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks

// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,

// which is not desirable.


bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(

    EVT VT, unsigned DefinedValues) const {

  return false;

}


InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {

  // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is

  // implementation-defined.

  if (!VT.isVector())

    return InstructionCost::getInvalid();

  unsigned DLenFactor = Subtarget.getDLenFactor();

  unsigned Cost;

  if (VT.isScalableVector()) {

    unsigned LMul;

    bool Fractional;

    std::tie(LMul, Fractional) =

        RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));

    if (Fractional)

      Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;

    else

      Cost = (LMul * DLenFactor);

  } else {

    Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);

  }

  return Cost;

}


/// Return the cost of a vrgather.vv instruction for the type VT.  vrgather.vv

/// may be quadratic in the number of vreg implied by LMUL, and is assumed to

/// be by default.  VRGatherCostModel reflects available options.  Note that

/// operand (index and possibly mask) are handled separately.


InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {

  auto LMULCost = getLMULCost(VT);

  bool Log2CostModel =

      Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;

  if (Log2CostModel && LMULCost.isValid()) {

    unsigned Log = Log2_64(LMULCost.getValue());

    if (Log > 0)

      return LMULCost * Log;

  }

  return LMULCost * LMULCost;

}


/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.

/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,

/// or may track the vrgather.vv cost. It is implementation-dependent.


InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {

  return getLMULCost(VT);

}


/// Return the cost of a vslidedown.vx or vslideup.vx instruction

/// for the type VT.  (This does not cover the vslide1up or vslide1down

/// variants.)  Slides may be linear in the number of vregs implied by LMUL,

/// or may track the vrgather.vv cost. It is implementation-dependent.


InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {

  return getLMULCost(VT);

}


/// Return the cost of a vslidedown.vi or vslideup.vi instruction

/// for the type VT.  (This does not cover the vslide1up or vslide1down

/// variants.)  Slides may be linear in the number of vregs implied by LMUL,

/// or may track the vrgather.vv cost. It is implementation-dependent.


InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {

  return getLMULCost(VT);

}


static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,

                              const RISCVSubtarget &Subtarget) {

  // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.

  // bf16 conversions are always promoted to f32.

  if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||

      Op.getValueType() == MVT::bf16) {

    bool IsStrict = Op->isStrictFPOpcode();


    SDLoc DL(Op);

    if (IsStrict) {

      SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},

                                {Op.getOperand(0), Op.getOperand(1)});

      return DAG.getNode(ISD::STRICT_FP_ROUND, DL,

                         {Op.getValueType(), MVT::Other},

                         {Val.getValue(1), Val.getValue(0),

                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});

    }

    return DAG.getNode(

        ISD::FP_ROUND, DL, Op.getValueType(),

        DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),

        DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));

  }


  // Other operations are legal.

  return Op;

}


static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,

                                  const RISCVSubtarget &Subtarget) {

  // RISC-V FP-to-int conversions saturate to the destination register size, but

  // don't produce 0 for nan. We can use a conversion instruction and fix the

  // nan case with a compare and a select.

  SDValue Src = Op.getOperand(0);


  MVT DstVT = Op.getSimpleValueType();

  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();


  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;


  if (!DstVT.isVector()) {

    // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate

    // the result.

    if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||

        Src.getValueType() == MVT::bf16) {

      Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);

    }


    unsigned Opc;

    if (SatVT == DstVT)

      Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;

    else if (DstVT == MVT::i64 && SatVT == MVT::i32)

      Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;

    else

      return SDValue();

    // FIXME: Support other SatVTs by clamping before or after the conversion.


    SDLoc DL(Op);

    SDValue FpToInt = DAG.getNode(

        Opc, DL, DstVT, Src,

        DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));


    if (Opc == RISCVISD::FCVT_WU_RV64)

      FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);


    SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);

    return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,

                           ISD::CondCode::SETUO);

  }


  // Vectors.


  MVT DstEltVT = DstVT.getVectorElementType();

  MVT SrcVT = Src.getSimpleValueType();

  MVT SrcEltVT = SrcVT.getVectorElementType();

  unsigned SrcEltSize = SrcEltVT.getSizeInBits();

  unsigned DstEltSize = DstEltVT.getSizeInBits();


  // Only handle saturating to the destination type.

  if (SatVT != DstEltVT)

    return SDValue();


  MVT DstContainerVT = DstVT;

  MVT SrcContainerVT = SrcVT;

  if (DstVT.isFixedLengthVector()) {

    DstContainerVT = getContainerForFixedLengthVector(DstVT, Subtarget);

    SrcContainerVT = getContainerForFixedLengthVector(SrcVT, Subtarget);

    assert(DstContainerVT.getVectorElementCount() ==

               SrcContainerVT.getVectorElementCount() &&

           "Expected same element count");

    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

  }


  SDLoc DL(Op);


  auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);


  SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),

                              {Src, Src, DAG.getCondCode(ISD::SETNE),

                               DAG.getUNDEF(Mask.getValueType()), Mask, VL});


  // Need to widen by more than 1 step, promote the FP type, then do a widening

  // convert.

  if (DstEltSize > (2 * SrcEltSize)) {

    assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");

    MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);

    Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);

  }


  MVT CvtContainerVT = DstContainerVT;

  MVT CvtEltVT = DstEltVT;

  if (SrcEltSize > (2 * DstEltSize)) {

    CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);

    CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);

  }


  unsigned RVVOpc =

      IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;

  SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);


  while (CvtContainerVT != DstContainerVT) {

    CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);

    CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);

    // Rounding mode here is arbitrary since we aren't shifting out any bits.

    unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT

                                : RISCVISD::TRUNCATE_VECTOR_VL_USAT;

    Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);

  }


  SDValue SplatZero = DAG.getNode(

      RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),

      DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);

  Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,

                    Res, DAG.getUNDEF(DstContainerVT), VL);


  if (DstVT.isFixedLengthVector())

    Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);


  return Res;

}


static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,

                              const RISCVSubtarget &Subtarget) {

  bool IsStrict = Op->isStrictFPOpcode();

  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);


  // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.

  // bf16 conversions are always promoted to f32.

  if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||

      SrcVal.getValueType() == MVT::bf16) {

    SDLoc DL(Op);

    if (IsStrict) {

      SDValue Ext =

          DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},

                      {Op.getOperand(0), SrcVal});

      return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},

                         {Ext.getValue(1), Ext.getValue(0)});

    }

    return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),

                       DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));

  }


  // Other operations are legal.

  return Op;

}


static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {

  switch (Opc) {

  case ISD::FROUNDEVEN:

  case ISD::STRICT_FROUNDEVEN:

    return RISCVFPRndMode::RNE;

  case ISD::FTRUNC:

  case ISD::STRICT_FTRUNC:

    return RISCVFPRndMode::RTZ;

  case ISD::FFLOOR:

  case ISD::STRICT_FFLOOR:

    return RISCVFPRndMode::RDN;

  case ISD::FCEIL:

  case ISD::STRICT_FCEIL:

    return RISCVFPRndMode::RUP;

  case ISD::FROUND:

  case ISD::LROUND:

  case ISD::LLROUND:

  case ISD::STRICT_FROUND:

  case ISD::STRICT_LROUND:

  case ISD::STRICT_LLROUND:

    return RISCVFPRndMode::RMM;

  case ISD::FRINT:

  case ISD::LRINT:

  case ISD::LLRINT:

  case ISD::STRICT_FRINT:

  case ISD::STRICT_LRINT:

  case ISD::STRICT_LLRINT:

    return RISCVFPRndMode::DYN;

  }


  return RISCVFPRndMode::Invalid;

}


// Expand vector FTRUNC, FCEIL, FFLOOR and FROUND by converting to

// the integer domain and back. Taking care to avoid converting values that are

// nan or already correct.

static SDValue


lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  MVT VT = Op.getSimpleValueType();

  assert(VT.isVector() && "Unexpected type");


  SDLoc DL(Op);


  SDValue Src = Op.getOperand(0);


  // Freeze the source since we are increasing the number of uses.

  Src = DAG.getFreeze(Src);


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

    Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  // We do the conversion on the absolute value and fix the sign at the end.

  SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);


  // Determine the largest integer that can be represented exactly. This and

  // values larger than it don't have any fractional bits so don't need to

  // be converted.

  const fltSemantics &FltSem = ContainerVT.getFltSemantics();

  unsigned Precision = APFloat::semanticsPrecision(FltSem);

  APFloat MaxVal = APFloat(FltSem);

  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),

                          /*IsSigned*/ false, APFloat::rmNearestTiesToEven);

  SDValue MaxValNode =

      DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());

  SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,

                                    DAG.getUNDEF(ContainerVT), MaxValNode, VL);


  // If abs(Src) was larger than MaxVal or nan, keep it.

  MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());

  Mask =

      DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,

                  {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),

                   Mask, Mask, VL});


  // Truncate to integer and convert back to FP.

  MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue Truncated;


  switch (Op.getOpcode()) {

  default:

    llvm_unreachable("Unexpected opcode");

  case ISD::FRINT:

  case ISD::FCEIL:

  case ISD::FFLOOR:

  case ISD::FROUND:

  case ISD::FROUNDEVEN: {

    RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());

    assert(FRM != RISCVFPRndMode::Invalid);

    Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,

                            DAG.getTargetConstant(FRM, DL, XLenVT), VL);

    break;

  }

  case ISD::FTRUNC:

    Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,

                            Mask, VL);

    break;

  case ISD::FNEARBYINT:

    Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,

                            Mask, VL);

    break;

  }


  // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.

  if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)

    Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,

                            Mask, VL);


  // Restore the original sign so that -0.0 is preserved.

  Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,

                          Src, Src, Mask, VL);


  if (!VT.isFixedLengthVector())

    return Truncated;


  return convertFromScalableVector(VT, Truncated, DAG, Subtarget);

}


// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND

// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to

// qNan and converting the new source to integer and back to FP.

static SDValue


lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,

                                            const RISCVSubtarget &Subtarget) {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  SDValue Chain = Op.getOperand(0);

  SDValue Src = Op.getOperand(1);


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

    Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  // Freeze the source since we are increasing the number of uses.

  Src = DAG.getFreeze(Src);


  // Convert sNan to qNan by executing x + x for all unordered element x in Src.

  MVT MaskVT = Mask.getSimpleValueType();

  SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,

                                DAG.getVTList(MaskVT, MVT::Other),

                                {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),

                                 DAG.getUNDEF(MaskVT), Mask, VL});

  Chain = Unorder.getValue(1);

  Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,

                    DAG.getVTList(ContainerVT, MVT::Other),

                    {Chain, Src, Src, Src, Unorder, VL});

  Chain = Src.getValue(1);


  // We do the conversion on the absolute value and fix the sign at the end.

  SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);


  // Determine the largest integer that can be represented exactly. This and

  // values larger than it don't have any fractional bits so don't need to

  // be converted.

  const fltSemantics &FltSem = ContainerVT.getFltSemantics();

  unsigned Precision = APFloat::semanticsPrecision(FltSem);

  APFloat MaxVal = APFloat(FltSem);

  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),

                          /*IsSigned*/ false, APFloat::rmNearestTiesToEven);

  SDValue MaxValNode =

      DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());

  SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,

                                    DAG.getUNDEF(ContainerVT), MaxValNode, VL);


  // If abs(Src) was larger than MaxVal or nan, keep it.

  Mask = DAG.getNode(

      RISCVISD::SETCC_VL, DL, MaskVT,

      {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});


  // Truncate to integer and convert back to FP.

  MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue Truncated;


  switch (Op.getOpcode()) {

  default:

    llvm_unreachable("Unexpected opcode");

  case ISD::STRICT_FCEIL:

  case ISD::STRICT_FFLOOR:

  case ISD::STRICT_FROUND:

  case ISD::STRICT_FROUNDEVEN: {

    RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());

    assert(FRM != RISCVFPRndMode::Invalid);

    Truncated = DAG.getNode(

        RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),

        {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});

    break;

  }

  case ISD::STRICT_FTRUNC:

    Truncated =

        DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,

                    DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);

    break;

  case ISD::STRICT_FNEARBYINT:

    Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,

                            DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,

                            Mask, VL);

    break;

  }

  Chain = Truncated.getValue(1);


  // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.

  if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {

    Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,

                            DAG.getVTList(ContainerVT, MVT::Other), Chain,

                            Truncated, Mask, VL);

    Chain = Truncated.getValue(1);

  }


  // Restore the original sign so that -0.0 is preserved.

  Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,

                          Src, Src, Mask, VL);


  if (VT.isFixedLengthVector())

    Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);

  return DAG.getMergeValues({Truncated, Chain}, DL);

}


static SDValue


lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,

                                const RISCVSubtarget &Subtarget) {

  MVT VT = Op.getSimpleValueType();

  if (VT.isVector())

    return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);


  if (DAG.shouldOptForSize())

    return SDValue();


  SDLoc DL(Op);

  SDValue Src = Op.getOperand(0);


  // Create an integer the size of the mantissa with the MSB set. This and all

  // values larger than it don't have any fractional bits so don't need to be

  // converted.

  const fltSemantics &FltSem = VT.getFltSemantics();

  unsigned Precision = APFloat::semanticsPrecision(FltSem);

  APFloat MaxVal = APFloat(FltSem);

  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),

                          /*IsSigned*/ false, APFloat::rmNearestTiesToEven);

  SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);


  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());

  return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,

                     DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));

}


// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.


static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG,

                                       const RISCVSubtarget &Subtarget) {

  SDLoc DL(Op);

  MVT DstVT = Op.getSimpleValueType();

  SDValue Src = Op.getOperand(0);

  MVT SrcVT = Src.getSimpleValueType();

  assert(SrcVT.isVector() && DstVT.isVector() &&

         !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&

         "Unexpected type");


  MVT DstContainerVT = DstVT;

  MVT SrcContainerVT = SrcVT;


  if (DstVT.isFixedLengthVector()) {

    DstContainerVT = getContainerForFixedLengthVector(DstVT, Subtarget);

    SrcContainerVT = getContainerForFixedLengthVector(SrcVT, Subtarget);

    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);


  // [b]f16 -> f32

  MVT SrcElemType = SrcVT.getVectorElementType();

  if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {

    MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);

    Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);

  }


  SDValue Res =

      DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,

                  DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,

                                        Subtarget.getXLenVT()),

                  VL);


  if (!DstVT.isFixedLengthVector())

    return Res;


  return convertFromScalableVector(DstVT, Res, DAG, Subtarget);

}


static SDValue


getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,

              const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,

              SDValue Offset, SDValue Mask, SDValue VL,

              unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED) {

  if (Passthru.isUndef())

    Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;

  SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());

  SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};

  return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);

}


static SDValue


getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,

            EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,

            SDValue VL,

            unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED) {

  if (Passthru.isUndef())

    Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;

  SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());

  SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};

  return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);

}


struct VIDSequence {

  int64_t StepNumerator;

  unsigned StepDenominator;

  int64_t Addend;

};


static std::optional<APInt> getExactInteger(const APFloat &APF,

                                            uint32_t BitWidth) {

  // We will use a SINT_TO_FP to materialize this constant so we should use a

  // signed APSInt here.

  APSInt ValInt(BitWidth, /*IsUnsigned*/ false);

  // We use an arbitrary rounding mode here. If a floating-point is an exact

  // integer (e.g., 1.0), the rounding mode does not affect the output value. If

  // the rounding mode changes the output value, then it is not an exact

  // integer.

  RoundingMode ArbitraryRM = RoundingMode::TowardZero;

  bool IsExact;

  // If it is out of signed integer range, it will return an invalid operation.

  // If it is not an exact integer, IsExact is false.

  if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==

       APFloatBase::opInvalidOp) ||

      !IsExact)

    return std::nullopt;

  return ValInt.extractBits(BitWidth, 0);

}


// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]

// to the (non-zero) step S and start value X. This can be then lowered as the

// RVV sequence (VID * S) + X, for example.

// The step S is represented as an integer numerator divided by a positive

// denominator. Note that the implementation currently only identifies

// sequences in which either the numerator is +/- 1 or the denominator is 1. It

// cannot detect 2/3, for example.

// Note that this method will also match potentially unappealing index

// sequences, like <i32 0, i32 50939494>, however it is left to the caller to

// determine whether this is worth generating code for.

//

// EltSizeInBits is the size of the type that the sequence will be calculated

// in, i.e. SEW for build_vectors or XLEN for address calculations.


static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,

                                                      unsigned EltSizeInBits) {

  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");

  if (!cast<BuildVectorSDNode>(Op)->isConstant())

    return std::nullopt;

  bool IsInteger = Op.getValueType().isInteger();


  std::optional<unsigned> SeqStepDenom;

  std::optional<APInt> SeqStepNum;

  std::optional<APInt> SeqAddend;

  std::optional<std::pair<APInt, unsigned>> PrevElt;

  assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());


  // First extract the ops into a list of constant integer values. This may not

  // be possible for floats if they're not all representable as integers.

  SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());

  const unsigned OpSize = Op.getScalarValueSizeInBits();

  for (auto [Idx, Elt] : enumerate(Op->op_values())) {

    if (Elt.isUndef()) {

      Elts[Idx] = std::nullopt;

      continue;

    }

    if (IsInteger) {

      Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);

    } else {

      auto ExactInteger =

          getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);

      if (!ExactInteger)

        return std::nullopt;

      Elts[Idx] = *ExactInteger;

    }

  }


  for (auto [Idx, Elt] : enumerate(Elts)) {

    // Assume undef elements match the sequence; we just have to be careful

    // when interpolating across them.

    if (!Elt)

      continue;


    if (PrevElt) {

      // Calculate the step since the last non-undef element, and ensure

      // it's consistent across the entire sequence.

      unsigned IdxDiff = Idx - PrevElt->second;

      APInt ValDiff = *Elt - PrevElt->first;


      // A zero-value value difference means that we're somewhere in the middle

      // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a

      // step change before evaluating the sequence.

      if (ValDiff == 0)

        continue;


      int64_t Remainder = ValDiff.srem(IdxDiff);

      // Normalize the step if it's greater than 1.

      if (Remainder != ValDiff.getSExtValue()) {

        // The difference must cleanly divide the element span.

        if (Remainder != 0)

          return std::nullopt;

        ValDiff = ValDiff.sdiv(IdxDiff);

        IdxDiff = 1;

      }


      if (!SeqStepNum)

        SeqStepNum = ValDiff;

      else if (ValDiff != SeqStepNum)

        return std::nullopt;


      if (!SeqStepDenom)

        SeqStepDenom = IdxDiff;

      else if (IdxDiff != *SeqStepDenom)

        return std::nullopt;

    }


    // Record this non-undef element for later.

    if (!PrevElt || PrevElt->first != *Elt)

      PrevElt = std::make_pair(*Elt, Idx);

  }


  // We need to have logged a step for this to count as a legal index sequence.

  if (!SeqStepNum || !SeqStepDenom)

    return std::nullopt;


  // Loop back through the sequence and validate elements we might have skipped

  // while waiting for a valid step. While doing this, log any sequence addend.

  for (auto [Idx, Elt] : enumerate(Elts)) {

    if (!Elt)

      continue;

    APInt ExpectedVal =

        (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *

         *SeqStepNum)

            .sdiv(*SeqStepDenom);


    APInt Addend = *Elt - ExpectedVal;

    if (!SeqAddend)

      SeqAddend = Addend;

    else if (Addend != SeqAddend)

      return std::nullopt;

  }


  assert(SeqAddend && "Must have an addend if we have a step");


  return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,

                     SeqAddend->getSExtValue()};

}


// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT

// and lower it as a VRGATHER_VX_VL from the source vector.


static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,

                                  SelectionDAG &DAG,

                                  const RISCVSubtarget &Subtarget) {

  if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)

    return SDValue();

  SDValue Src = SplatVal.getOperand(0);

  // Don't perform this optimization for i1 vectors, or if the element types are

  // different

  // FIXME: Support i1 vectors, maybe by promoting to i8?

  MVT EltTy = VT.getVectorElementType();

  if (EltTy == MVT::i1 ||

      !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))

    return SDValue();

  MVT SrcVT = Src.getSimpleValueType();

  if (EltTy != SrcVT.getVectorElementType())

    return SDValue();

  SDValue Idx = SplatVal.getOperand(1);

  // The index must be a legal type.

  if (Idx.getValueType() != Subtarget.getXLenVT())

    return SDValue();


  // Check that we know Idx lies within VT

  if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {

    auto *CIdx = dyn_cast<ConstantSDNode>(Idx);

    if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())

      return SDValue();

  }


  // Convert fixed length vectors to scalable

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector())

    ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);


  MVT SrcContainerVT = SrcVT;

  if (SrcVT.isFixedLengthVector()) {

    SrcContainerVT = getContainerForFixedLengthVector(SrcVT, Subtarget);

    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

  }


  // Put Vec in a VT sized vector

  if (SrcContainerVT.getVectorMinNumElements() <

      ContainerVT.getVectorMinNumElements())

    Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);

  else

    Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);


  // We checked that Idx fits inside VT earlier

  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,

                               Idx, DAG.getUNDEF(ContainerVT), Mask, VL);

  if (VT.isFixedLengthVector())

    Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);

  return Gather;

}


static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  MVT VT = Op.getSimpleValueType();

  assert(VT.isFixedLengthVector() && "Unexpected vector!");


  MVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);


  SDLoc DL(Op);

  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {

    int64_t StepNumerator = SimpleVID->StepNumerator;

    unsigned StepDenominator = SimpleVID->StepDenominator;

    int64_t Addend = SimpleVID->Addend;


    assert(StepNumerator != 0 && "Invalid step");

    bool Negate = false;

    int64_t SplatStepVal = StepNumerator;

    unsigned StepOpcode = ISD::MUL;

    // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it

    // anyway as the shift of 63 won't fit in uimm5.

    if (StepNumerator != 1 && StepNumerator != INT64_MIN &&

        isPowerOf2_64(std::abs(StepNumerator))) {

      Negate = StepNumerator < 0;

      StepOpcode = ISD::SHL;

      SplatStepVal = Log2_64(std::abs(StepNumerator));

    }


    // Only emit VIDs with suitably-small steps. We use imm5 as a threshold

    // since it's the immediate value many RVV instructions accept. There is

    // no vmul.vi instruction so ensure multiply constant can fit in a

    // single addi instruction.  For the addend, we allow up to 32 bits..

    if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||

         (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&

        isPowerOf2_32(StepDenominator) &&

        (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {

      MVT VIDVT =

          VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;

      MVT VIDContainerVT = getContainerForFixedLengthVector(VIDVT, Subtarget);

      SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);

      // Convert right out of the scalable type so we can use standard ISD

      // nodes for the rest of the computation. If we used scalable types with

      // these, we'd lose the fixed-length vector info and generate worse

      // vsetvli code.

      VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);

      if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||

          (StepOpcode == ISD::SHL && SplatStepVal != 0)) {

        SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);

        VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);

      }

      if (StepDenominator != 1) {

        SDValue SplatStep =

            DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);

        VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);

      }

      if (Addend != 0 || Negate) {

        SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);

        VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,

                          VID);

      }

      if (VT.isFloatingPoint()) {

        // TODO: Use vfwcvt to reduce register pressure.

        VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);

      }

      return VID;

    }

  }


  return SDValue();

}


/// Try and optimize BUILD_VECTORs with "dominant values" - these are values

/// which constitute a large proportion of the elements. In such cases we can

/// splat a vector with the dominant element and make up the shortfall with

/// INSERT_VECTOR_ELTs.  Returns SDValue if not profitable.

/// Note that this includes vectors of 2 elements by association. The

/// upper-most element is the "dominant" one, allowing us to use a splat to

/// "insert" the upper element, and an insert of the lower element at position

/// 0, which improves codegen.


static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,

                                                 const RISCVSubtarget &Subtarget) {

  MVT VT = Op.getSimpleValueType();

  assert(VT.isFixedLengthVector() && "Unexpected vector!");


  MVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);


  SDLoc DL(Op);

  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  MVT XLenVT = Subtarget.getXLenVT();

  unsigned NumElts = Op.getNumOperands();


  SDValue DominantValue;

  unsigned MostCommonCount = 0;

  DenseMap<SDValue, unsigned> ValueCounts;

  unsigned NumUndefElts =

      count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });


  // Track the number of scalar loads we know we'd be inserting, estimated as

  // any non-zero floating-point constant. Other kinds of element are either

  // already in registers or are materialized on demand. The threshold at which

  // a vector load is more desirable than several scalar materializion and

  // vector-insertion instructions is not known.

  unsigned NumScalarLoads = 0;


  for (SDValue V : Op->op_values()) {

    if (V.isUndef())

      continue;


    unsigned &Count = ValueCounts[V];

    if (0 == Count)

      if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))

        NumScalarLoads += !CFP->isPosZero();


    // Is this value dominant? In case of a tie, prefer the highest element as

    // it's cheaper to insert near the beginning of a vector than it is at the

    // end.

    if (++Count >= MostCommonCount) {

      DominantValue = V;

      MostCommonCount = Count;

    }

  }


  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");

  unsigned NumDefElts = NumElts - NumUndefElts;

  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;


  // Don't perform this optimization when optimizing for size, since

  // materializing elements and inserting them tends to cause code bloat.

  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&

      (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&

      ((MostCommonCount > DominantValueCountThreshold) ||

       (ValueCounts.size() <= Log2_32(NumDefElts)))) {

    // Start by splatting the most common element.

    SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);


    DenseSet<SDValue> Processed{DominantValue};


    // We can handle an insert into the last element (of a splat) via

    // v(f)slide1down.  This is slightly better than the vslideup insert

    // lowering as it avoids the need for a vector group temporary.  It

    // is also better than using vmerge.vx as it avoids the need to

    // materialize the mask in a vector register.

    if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);

        !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&

        LastOp != DominantValue) {

      Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

      auto OpCode =

        VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;

      if (!VT.isFloatingPoint())

        LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);

      Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,

                        LastOp, Mask, VL);

      Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);

      Processed.insert(LastOp);

    }


    MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);

    for (const auto &OpIdx : enumerate(Op->ops())) {

      const SDValue &V = OpIdx.value();

      if (V.isUndef() || !Processed.insert(V).second)

        continue;

      if (ValueCounts[V] == 1) {

        Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());

      } else {

        // Blend in all instances of this value using a VSELECT, using a

        // mask where each bit signals whether that element is the one

        // we're after.

        SmallVector<SDValue> Ops;

        transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {

          return DAG.getConstant(V == V1, DL, XLenVT);

        });

        Vec = DAG.getNode(ISD::VSELECT, DL, VT,

                          DAG.getBuildVector(SelMaskTy, DL, Ops),

                          DAG.getSplatBuildVector(VT, DL, V), Vec);

      }

    }


    return Vec;

  }


  return SDValue();

}


static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,

                                           const RISCVSubtarget &Subtarget) {

  MVT VT = Op.getSimpleValueType();

  assert(VT.isFixedLengthVector() && "Unexpected vector!");


  MVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);


  SDLoc DL(Op);

  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  MVT XLenVT = Subtarget.getXLenVT();

  unsigned NumElts = Op.getNumOperands();


  if (VT.getVectorElementType() == MVT::i1) {

    if (ISD::isBuildVectorAllZeros(Op.getNode())) {

      SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);

      return convertFromScalableVector(VT, VMClr, DAG, Subtarget);

    }


    if (ISD::isBuildVectorAllOnes(Op.getNode())) {

      SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);

      return convertFromScalableVector(VT, VMSet, DAG, Subtarget);

    }


    // Lower constant mask BUILD_VECTORs via an integer vector type, in

    // scalar integer chunks whose bit-width depends on the number of mask

    // bits and XLEN.

    // First, determine the most appropriate scalar integer type to use. This

    // is at most XLenVT, but may be shrunk to a smaller vector element type

    // according to the size of the final vector - use i8 chunks rather than

    // XLenVT if we're producing a v8i1. This results in more consistent

    // codegen across RV32 and RV64.

    unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());

    NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());

    // If we have to use more than one INSERT_VECTOR_ELT then this

    // optimization is likely to increase code size; avoid performing it in

    // such a case. We can use a load from a constant pool in this case.

    if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)

      return SDValue();

    // Now we can create our integer vector type. Note that it may be larger

    // than the resulting mask type: v4i1 would use v1i8 as its integer type.

    unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);

    MVT IntegerViaVecVT =

      MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),

                       IntegerViaVecElts);


    uint64_t Bits = 0;

    unsigned BitPos = 0, IntegerEltIdx = 0;

    SmallVector<SDValue, 8> Elts(IntegerViaVecElts);


    for (unsigned I = 0; I < NumElts;) {

      SDValue V = Op.getOperand(I);

      bool BitValue = !V.isUndef() && V->getAsZExtVal();

      Bits |= ((uint64_t)BitValue << BitPos);

      ++BitPos;

      ++I;


      // Once we accumulate enough bits to fill our scalar type or process the

      // last element, insert into our vector and clear our accumulated data.

      if (I % NumViaIntegerBits == 0 || I == NumElts) {

        if (NumViaIntegerBits <= 32)

          Bits = SignExtend64<32>(Bits);

        SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);

        Elts[IntegerEltIdx] = Elt;

        Bits = 0;

        BitPos = 0;

        IntegerEltIdx++;

      }

    }


    SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);


    if (NumElts < NumViaIntegerBits) {

      // If we're producing a smaller vector than our minimum legal integer

      // type, bitcast to the equivalent (known-legal) mask type, and extract

      // our final mask.

      assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");

      Vec = DAG.getBitcast(MVT::v8i1, Vec);

      Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);

    } else {

      // Else we must have produced an integer type with the same size as the

      // mask type; bitcast for the final result.

      assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());

      Vec = DAG.getBitcast(VT, Vec);

    }


    return Vec;

  }


  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {

    unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL

                                        : RISCVISD::VMV_V_X_VL;

    if (!VT.isFloatingPoint())

      Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);

    Splat =

        DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);

    return convertFromScalableVector(VT, Splat, DAG, Subtarget);

  }


  // Try and match index sequences, which we can lower to the vid instruction

  // with optional modifications. An all-undef vector is matched by

  // getSplatValue, above.

  if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))

    return Res;


  // For very small build_vectors, use a single scalar insert of a constant.

  // TODO: Base this on constant rematerialization cost, not size.

  const unsigned EltBitSize = VT.getScalarSizeInBits();

  if (VT.getSizeInBits() <= 32 &&

      ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {

    MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());

    assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&

           "Unexpected sequence type");

    // If we can use the original VL with the modified element type, this

    // means we only have a VTYPE toggle, not a VL toggle.  TODO: Should this

    // be moved into InsertVSETVLI?

    unsigned ViaVecLen =

      (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;

    MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);


    uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);

    uint64_t SplatValue = 0;

    // Construct the amalgamated value at this larger vector type.

    for (const auto &OpIdx : enumerate(Op->op_values())) {

      const auto &SeqV = OpIdx.value();

      if (!SeqV.isUndef())

        SplatValue |=

            ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));

    }


    // On RV64, sign-extend from 32 to 64 bits where possible in order to

    // achieve better constant materializion.

    // On RV32, we need to sign-extend to use getSignedConstant.

    if (ViaIntVT == MVT::i32)

      SplatValue = SignExtend64<32>(SplatValue);


    SDValue Vec = DAG.getInsertVectorElt(

        DL, DAG.getUNDEF(ViaVecVT),

        DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);

    if (ViaVecLen != 1)

      Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);

    return DAG.getBitcast(VT, Vec);

  }


  // Attempt to detect "hidden" splats, which only reveal themselves as splats

  // when re-interpreted as a vector with a larger element type. For example,

  //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1

  // could be instead splat as

  //   v2i32 = build_vector i32 0x00010000, i32 0x00010000

  // TODO: This optimization could also work on non-constant splats, but it

  // would require bit-manipulation instructions to construct the splat value.

  SmallVector<SDValue> Sequence;

  const auto *BV = cast<BuildVectorSDNode>(Op);

  if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&

      ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&

      BV->getRepeatedSequence(Sequence) &&

      (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {

    unsigned SeqLen = Sequence.size();

    MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);

    assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||

            ViaIntVT == MVT::i64) &&

           "Unexpected sequence type");


    // If we can use the original VL with the modified element type, this

    // means we only have a VTYPE toggle, not a VL toggle.  TODO: Should this

    // be moved into InsertVSETVLI?

    const unsigned RequiredVL = NumElts / SeqLen;

    const unsigned ViaVecLen =

      (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?

      NumElts : RequiredVL;

    MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);


    unsigned EltIdx = 0;

    uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);

    uint64_t SplatValue = 0;

    // Construct the amalgamated value which can be splatted as this larger

    // vector type.

    for (const auto &SeqV : Sequence) {

      if (!SeqV.isUndef())

        SplatValue |=

            ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));

      EltIdx++;

    }


    // On RV64, sign-extend from 32 to 64 bits where possible in order to

    // achieve better constant materializion.

    // On RV32, we need to sign-extend to use getSignedConstant.

    if (ViaIntVT == MVT::i32)

      SplatValue = SignExtend64<32>(SplatValue);


    // Since we can't introduce illegal i64 types at this stage, we can only

    // perform an i64 splat on RV32 if it is its own sign-extended value. That

    // way we can use RVV instructions to splat.

    assert((ViaIntVT.bitsLE(XLenVT) ||

            (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&

           "Unexpected bitcast sequence");

    if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {

      SDValue ViaVL =

          DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);

      MVT ViaContainerVT =

          getContainerForFixedLengthVector(ViaVecVT, Subtarget);

      SDValue Splat =

          DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,

                      DAG.getUNDEF(ViaContainerVT),

                      DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);

      Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);

      if (ViaVecLen != RequiredVL)

        Splat = DAG.getExtractSubvector(

            DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);

      return DAG.getBitcast(VT, Splat);

    }

  }


  // If the number of signbits allows, see if we can lower as a <N x i8>.

  // Our main goal here is to reduce LMUL (and thus work) required to

  // build the constant, but we will also narrow if the resulting

  // narrow vector is known to materialize cheaply.

  // TODO: We really should be costing the smaller vector.  There are

  // profitable cases this misses.

  if (EltBitSize > 8 && VT.isInteger() &&

      (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&

      DAG.ComputeMaxSignificantBits(Op) <= 8) {

    SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),

                                        DL, Op->ops());

    Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),

                                     Source, DAG, Subtarget);

    SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);

    return convertFromScalableVector(VT, Res, DAG, Subtarget);

  }


  if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))

    return Res;


  // For constant vectors, use generic constant pool lowering.  Otherwise,

  // we'd have to materialize constants in GPRs just to move them into the

  // vector.

  return SDValue();

}


static unsigned getPACKOpcode(unsigned DestBW,

                              const RISCVSubtarget &Subtarget) {

  switch (DestBW) {

  default:

    llvm_unreachable("Unsupported pack size");

  case 16:

    return RISCV::PACKH;

  case 32:

    return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;

  case 64:

    assert(Subtarget.is64Bit());

    return RISCV::PACK;

  }

}


/// Double the element size of the build vector to reduce the number

/// of vslide1down in the build vector chain.  In the worst case, this

/// trades three scalar operations for 1 vector operation.  Scalar

/// operations are generally lower latency, and for out-of-order cores

/// we also benefit from additional parallelism.


static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG,

                                          const RISCVSubtarget &Subtarget) {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  assert(VT.isFixedLengthVector() && "Unexpected vector!");

  MVT ElemVT = VT.getVectorElementType();

  if (!ElemVT.isInteger())

    return SDValue();


  // TODO: Relax these architectural restrictions, possibly with costing

  // of the actual instructions required.

  if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())

    return SDValue();


  unsigned NumElts = VT.getVectorNumElements();

  unsigned ElemSizeInBits = ElemVT.getSizeInBits();

  if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||

      NumElts % 2 != 0)

    return SDValue();


  // Produce [B,A] packed into a type twice as wide.  Note that all

  // scalars are XLenVT, possibly masked (see below).

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue Mask = DAG.getConstant(

      APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);

  auto pack = [&](SDValue A, SDValue B) {

    // Bias the scheduling of the inserted operations to near the

    // definition of the element - this tends to reduce register

    // pressure overall.

    SDLoc ElemDL(B);

    if (Subtarget.hasStdExtZbkb())

      // Note that we're relying on the high bits of the result being

      // don't care.  For PACKW, the result is *sign* extended.

      return SDValue(

          DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),

                             ElemDL, XLenVT, A, B),

          0);


    A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);

    B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);

    SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);

    return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,

                       DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),

                       SDNodeFlags::Disjoint);

  };


  SmallVector<SDValue> NewOperands;

  NewOperands.reserve(NumElts / 2);

  for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)

    NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));

  assert(NumElts == NewOperands.size() * 2);

  MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);

  MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);

  return DAG.getNode(ISD::BITCAST, DL, VT,

                     DAG.getBuildVector(WideVecVT, DL, NewOperands));

}


static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  MVT VT = Op.getSimpleValueType();

  assert(VT.isFixedLengthVector() && "Unexpected vector!");


  MVT EltVT = VT.getVectorElementType();

  MVT XLenVT = Subtarget.getXLenVT();


  SDLoc DL(Op);


  if (Subtarget.isRV32() && Subtarget.hasStdExtP()) {

    if (VT != MVT::v4i8)

      return SDValue();


    // <4 x i8> BUILD_VECTOR a, b, c, d -> PACK(PPACK.DH pair(a, c), pair(b, d))

    SDValue Val0 =

        DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(0));

    SDValue Val1 =

        DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(1));

    SDValue Val2 =

        DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(2));

    SDValue Val3 =

        DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(3));

    SDValue PPairDB =

        DAG.getNode(RISCVISD::PPAIRE_DB, DL, {MVT::v4i8, MVT::v4i8},

                    {Val0, Val2, Val1, Val3});


    return DAG.getNode(

        ISD::BITCAST, DL, MVT::v4i8,

        SDValue(

            DAG.getMachineNode(

                RISCV::PACK, DL, MVT::i32,

                {DAG.getNode(ISD::BITCAST, DL, MVT::i32, PPairDB.getValue(0)),

                 DAG.getNode(ISD::BITCAST, DL, MVT::i32, PPairDB.getValue(1))}),

            0));

  }


  // Proper support for f16 requires Zvfh. bf16 always requires special

  // handling. We need to cast the scalar to integer and create an integer

  // build_vector.

  if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

      (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {

    MVT IVT = VT.changeVectorElementType(MVT::i16);

    SmallVector<SDValue, 16> NewOps(Op.getNumOperands());

    for (const auto &[I, U] : enumerate(Op->ops())) {

      SDValue Elem = U.get();

      if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||

          (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {

        // Called by LegalizeDAG, we need to use XLenVT operations since we

        // can't create illegal types.

        if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {

          // Manually constant fold so the integer build_vector can be lowered

          // better. Waiting for DAGCombine will be too late.

          APInt V =

              C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());

          NewOps[I] = DAG.getConstant(V, DL, XLenVT);

        } else {

          NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);

        }

      } else {

        // Called by scalar type legalizer, we can use i16.

        NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));

      }

    }

    SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);

    return DAG.getBitcast(VT, Res);

  }


  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||

      ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))

    return lowerBuildVectorOfConstants(Op, DAG, Subtarget);


  MVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);


  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  if (VT.getVectorElementType() == MVT::i1) {

    // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask

    // vector type, we have a legal equivalently-sized i8 type, so we can use

    // that.

    MVT WideVecVT = VT.changeVectorElementType(MVT::i8);

    SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);


    SDValue WideVec;

    if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {

      // For a splat, perform a scalar truncate before creating the wider

      // vector.

      Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,

                          DAG.getConstant(1, DL, Splat.getValueType()));

      WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);

    } else {

      SmallVector<SDValue, 8> Ops(Op->op_values());

      WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);

      SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);

      WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);

    }


    return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);

  }


  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {

    if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))

      return Gather;


    if (!VT.isFloatingPoint())

      Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);


    // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register

    // pressure at high LMUL.

    bool IsScalar = all_of(Op->ops().drop_front(),

                           [](const SDUse &U) { return U.get().isUndef(); });

    unsigned Opc =

        VT.isFloatingPoint()

            ? (IsScalar ? RISCVISD::VFMV_S_F_VL : RISCVISD::VFMV_V_F_VL)

            : (IsScalar ? RISCVISD::VMV_S_X_VL : RISCVISD::VMV_V_X_VL);

    Splat =

        DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);

    return convertFromScalableVector(VT, Splat, DAG, Subtarget);

  }


  if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))

    return Res;


  // If we're compiling for an exact VLEN value, we can split our work per

  // register in the register group.

  if (const auto VLen = Subtarget.getRealVLen();

      VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {

    MVT ElemVT = VT.getVectorElementType();

    unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();

    EVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

    MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);

    MVT M1VT = getContainerForFixedLengthVector(OneRegVT, Subtarget);

    assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));


    // The following semantically builds up a fixed length concat_vector

    // of the component build_vectors.  We eagerly lower to scalable and

    // insert_subvector here to avoid DAG combining it back to a large

    // build_vector.

    SmallVector<SDValue> BuildVectorOps(Op->ops());

    unsigned NumOpElts = M1VT.getVectorMinNumElements();

    SDValue Vec = DAG.getUNDEF(ContainerVT);

    for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {

      auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);

      SDValue SubBV =

          DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);

      SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);

      unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;

      Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);

    }

    return convertFromScalableVector(VT, Vec, DAG, Subtarget);

  }


  // If we're about to resort to vslide1down (or stack usage), pack our

  // elements into the widest scalar type we can.  This will force a VL/VTYPE

  // toggle, but reduces the critical path, the number of vslide1down ops

  // required, and possibly enables scalar folds of the values.

  if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))

    return Res;


  // For m1 vectors, if we have non-undef values in both halves of our vector,

  // split the vector into low and high halves, build them separately, then

  // use a vselect to combine them.  For long vectors, this cuts the critical

  // path of the vslide1down sequence in half, and gives us an opportunity

  // to special case each half independently.  Note that we don't change the

  // length of the sub-vectors here, so if both fallback to the generic

  // vslide1down path, we should be able to fold the vselect into the final

  // vslidedown (for the undef tail) for the first half w/ masking.

  unsigned NumElts = VT.getVectorNumElements();

  unsigned NumUndefElts =

      count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });

  unsigned NumDefElts = NumElts - NumUndefElts;

  if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&

      ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {

    SmallVector<SDValue> SubVecAOps, SubVecBOps;

    SmallVector<SDValue> MaskVals;

    SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));

    SubVecAOps.reserve(NumElts);

    SubVecBOps.reserve(NumElts);

    for (const auto &[Idx, U] : enumerate(Op->ops())) {

      SDValue Elem = U.get();

      if (Idx < NumElts / 2) {

        SubVecAOps.push_back(Elem);

        SubVecBOps.push_back(UndefElem);

      } else {

        SubVecAOps.push_back(UndefElem);

        SubVecBOps.push_back(Elem);

      }

      bool SelectMaskVal = (Idx < NumElts / 2);

      MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));

    }

    assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&

           MaskVals.size() == NumElts);


    SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);

    SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);

    MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

    SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);

    return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);

  }


  // Cap the cost at a value linear to the number of elements in the vector.

  // The default lowering is to use the stack.  The vector store + scalar loads

  // is linear in VL.  However, at high lmuls vslide1down and vslidedown end up

  // being (at least) linear in LMUL.  As a result, using the vslidedown

  // lowering for every element ends up being VL*LMUL..

  // TODO: Should we be directly costing the stack alternative?  Doing so might

  // give us a more accurate upper bound.

  InstructionCost LinearBudget = VT.getVectorNumElements() * 2;


  // TODO: unify with TTI getSlideCost.

  InstructionCost PerSlideCost = 1;

  switch (RISCVTargetLowering::getLMUL(ContainerVT)) {

  default: break;

  case RISCVVType::LMUL_2:

    PerSlideCost = 2;

    break;

  case RISCVVType::LMUL_4:

    PerSlideCost = 4;

    break;

  case RISCVVType::LMUL_8:

    PerSlideCost = 8;

    break;

  }


  // TODO: Should we be using the build instseq then cost + evaluate scheme

  // we use for integer constants here?

  unsigned UndefCount = 0;

  for (const SDValue &V : Op->ops()) {

    if (V.isUndef()) {

      UndefCount++;

      continue;

    }

    if (UndefCount) {

      LinearBudget -= PerSlideCost;

      UndefCount = 0;

    }

    LinearBudget -= PerSlideCost;

  }

  if (UndefCount) {

    LinearBudget -= PerSlideCost;

  }


  if (LinearBudget < 0)

    return SDValue();


  assert((!VT.isFloatingPoint() ||

          VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&

         "Illegal type which will result in reserved encoding");


  const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;


  // General case: splat the first operand and slide other operands down one

  // by one to form a vector. Alternatively, if every operand is an

  // extraction from element 0 of a vector, we use that vector from the last

  // extraction as the start value and slide up instead of slide down. Such that

  // (1) we can avoid the initial splat (2) we can turn those vslide1up into

  // vslideup of 1 later and eliminate the vector to scalar movement, which is

  // something we cannot do with vslide1down/vslidedown.

  // Of course, using vslide1up/vslideup might increase the register pressure,

  // and that's why we conservatively limit to cases where every operand is an

  // extraction from the first element.

  SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());

  SDValue EVec;

  bool SlideUp = false;

  auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,

                       SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {

    if (SlideUp)

      return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,

                         Mask, VL, Policy);

    return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,

                         Mask, VL, Policy);

  };


  // The reason we don't use all_of here is because we're also capturing EVec

  // from the last non-undef operand. If the std::execution_policy of the

  // underlying std::all_of is anything but std::sequenced_policy we might

  // capture the wrong EVec.

  for (SDValue V : Operands) {

    using namespace SDPatternMatch;

    SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));

    if (!SlideUp)

      break;

  }


  // Do not slideup if the element type of EVec is different.

  if (SlideUp) {

    MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();

    MVT ContainerEltVT = ContainerVT.getVectorElementType();

    if (EVecEltVT != ContainerEltVT)

      SlideUp = false;

  }


  if (SlideUp) {

    MVT EVecContainerVT = EVec.getSimpleValueType();

    // Make sure the original vector has scalable vector type.

    if (EVecContainerVT.isFixedLengthVector()) {

      EVecContainerVT =

          getContainerForFixedLengthVector(EVecContainerVT, Subtarget);

      EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);

    }


    // Adapt EVec's type into ContainerVT.

    if (EVecContainerVT.getVectorMinNumElements() <

        ContainerVT.getVectorMinNumElements())

      EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);

    else

      EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);


    // Reverse the elements as we're going to slide up from the last element.

    std::reverse(Operands.begin(), Operands.end());

  }


  SDValue Vec;

  UndefCount = 0;

  for (SDValue V : Operands) {

    if (V.isUndef()) {

      UndefCount++;

      continue;

    }


    // Start our sequence with either a TA splat or extract source in the

    // hopes that hardware is able to recognize there's no dependency on the

    // prior value of our temporary register.

    if (!Vec) {

      if (SlideUp) {

        Vec = EVec;

      } else {

        Vec = DAG.getSplatVector(VT, DL, V);

        Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

      }


      UndefCount = 0;

      continue;

    }


    if (UndefCount) {

      const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());

      Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,

                      VL);

      UndefCount = 0;

    }


    unsigned Opcode;

    if (VT.isFloatingPoint())

      Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;

    else

      Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;


    if (!VT.isFloatingPoint())

      V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);

    Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,

                      V, Mask, VL);

  }

  if (UndefCount) {

    const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());

    Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,

                    VL);

  }

  return convertFromScalableVector(VT, Vec, DAG, Subtarget);

}


static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,

                                   SDValue Lo, SDValue Hi, SDValue VL,

                                   SelectionDAG &DAG) {

  if (!Passthru)

    Passthru = DAG.getUNDEF(VT);

  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {

    int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();

    int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();

    // If Hi constant is all the same sign bit as Lo, lower this as a custom

    // node in order to try and match RVV vector/scalar instructions.

    if ((LoC >> 31) == HiC)

      return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);


    // Use vmv.v.x with EEW=32.  Use either a vsetivli or vsetvli to change

    // VL.  This can temporarily increase VL if VL less than VLMAX.

    if (LoC == HiC) {

      SDValue NewVL;

      if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))

        NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);

      else

        NewVL = DAG.getRegister(RISCV::X0, MVT::i32);

      MVT InterVT =

          MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);

      auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,

                                  DAG.getUNDEF(InterVT), Lo, NewVL);

      return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);

    }

  }


  // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.

  if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&

      isa<ConstantSDNode>(Hi.getOperand(1)) &&

      Hi.getConstantOperandVal(1) == 31)

    return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);


  // If the hi bits of the splat are undefined, then it's fine to just splat Lo

  // even if it might be sign extended.

  if (Hi.isUndef())

    return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);


  // Fall back to a stack store and stride x0 vector load.

  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,

                     Hi, VL);

}


// Called by type legalization to handle splat of i64 on RV32.

// FIXME: We can optimize this when the type has sign or zero bits in one

// of the halves.


static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,

                                   SDValue Scalar, SDValue VL,

                                   SelectionDAG &DAG) {

  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");

  SDValue Lo, Hi;

  std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);

  return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);

}


// This function lowers a splat of a scalar operand Splat with the vector

// length VL. It ensures the final sequence is type legal, which is useful when

// lowering a splat after type legalization.


static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,

                                MVT VT, const SDLoc &DL, SelectionDAG &DAG,

                                const RISCVSubtarget &Subtarget) {

  bool HasPassthru = Passthru && !Passthru.isUndef();

  if (!HasPassthru && !Passthru)

    Passthru = DAG.getUNDEF(VT);


  MVT EltVT = VT.getVectorElementType();

  MVT XLenVT = Subtarget.getXLenVT();


  if (VT.isFloatingPoint()) {

    if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

        (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {

      if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||

          (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))

        Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);

      else

        Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);

      MVT IVT = VT.changeVectorElementType(MVT::i16);

      Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);

      SDValue Splat =

          lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);

      return DAG.getNode(ISD::BITCAST, DL, VT, Splat);

    }

    return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);

  }


  // Simplest case is that the operand needs to be promoted to XLenVT.

  if (Scalar.getValueType().bitsLE(XLenVT)) {

    // If the operand is a constant, sign extend to increase our chances

    // of being able to use a .vi instruction. ANY_EXTEND would become a

    // a zero extend and the simm5 check in isel would fail.

    // FIXME: Should we ignore the upper bits in isel instead?

    unsigned ExtOpc =

        isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;

    Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);

    return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);

  }


  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&

         "Unexpected scalar for splat lowering!");


  if (isOneConstant(VL) && isNullConstant(Scalar))

    return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,

                       DAG.getConstant(0, DL, XLenVT), VL);


  // Otherwise use the more complicated splatting algorithm.

  return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);

}


// This function lowers an insert of a scalar operand Scalar into lane

// 0 of the vector regardless of the value of VL.  The contents of the

// remaining lanes of the result vector are unspecified.  VL is assumed

// to be non-zero.


static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,

                                 const SDLoc &DL, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  assert(VT.isScalableVector() && "Expect VT is scalable vector type.");


  const MVT XLenVT = Subtarget.getXLenVT();

  SDValue Passthru = DAG.getUNDEF(VT);


  if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

      isNullConstant(Scalar.getOperand(1))) {

    SDValue ExtractedVal = Scalar.getOperand(0);

    // The element types must be the same.

    if (ExtractedVal.getValueType().getVectorElementType() ==

        VT.getVectorElementType()) {

      MVT ExtractedVT = ExtractedVal.getSimpleValueType();

      MVT ExtractedContainerVT = ExtractedVT;

      if (ExtractedContainerVT.isFixedLengthVector()) {

        ExtractedContainerVT =

            getContainerForFixedLengthVector(ExtractedContainerVT, Subtarget);

        ExtractedVal = convertToScalableVector(ExtractedContainerVT,

                                               ExtractedVal, DAG, Subtarget);

      }

      if (ExtractedContainerVT.bitsLE(VT))

        return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);

      return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);

    }

  }


  if (VT.isFloatingPoint())

    return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,

                       VL);


  // Avoid the tricky legalization cases by falling back to using the

  // splat code which already handles it gracefully.

  if (!Scalar.getValueType().bitsLE(XLenVT))

    return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,

                            DAG.getConstant(1, DL, XLenVT),

                            VT, DL, DAG, Subtarget);


  // If the operand is a constant, sign extend to increase our chances

  // of being able to use a .vi instruction. ANY_EXTEND would become a

  // a zero extend and the simm5 check in isel would fail.

  // FIXME: Should we ignore the upper bits in isel instead?

  unsigned ExtOpc =

      isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;

  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);

  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,

                     VL);

}


/// If concat_vector(V1,V2) could be folded away to some existing

/// vector source, return it.  Note that the source may be larger

/// than the requested concat_vector (i.e. a extract_subvector

/// might be required.)


static SDValue foldConcatVector(SDValue V1, SDValue V2) {

  EVT VT = V1.getValueType();

  assert(VT == V2.getValueType() && "argument types must match");

  // Both input must be extracts.

  if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||

      V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)

    return SDValue();


  // Extracting from the same source.

  SDValue Src = V1.getOperand(0);

  if (Src != V2.getOperand(0) ||

      VT.isScalableVector() != Src.getValueType().isScalableVector())

    return SDValue();


  // The extracts must extract the two halves of the source.

  if (V1.getConstantOperandVal(1) != 0 ||

      V2.getConstantOperandVal(1) != VT.getVectorMinNumElements())

    return SDValue();


  return Src;

}


// Can this shuffle be performed on exactly one (possibly larger) input?


static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {


  if (V2.isUndef())

    return V1;


  unsigned NumElts = VT.getVectorNumElements();

  // Src needs to have twice the number of elements.

  // TODO: Update shuffle lowering to add the extract subvector

  if (SDValue Src = foldConcatVector(V1, V2);

      Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))

    return Src;


  return SDValue();

}


static bool isLegalVTForZvzipOperand(MVT VT, const RISCVSubtarget &Subtarget) {

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector())

    ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

  // Determine LMUL of the container vector.

  return RISCVTargetLowering::getLMUL(ContainerVT) != RISCVVType::LMUL_8;

}


/// Is this shuffle interleaving contiguous elements from one vector into the

/// even elements and contiguous elements from another vector into the odd

/// elements. \p EvenSrc will contain the element that should be in the first

/// even element. \p OddSrc will contain the element that should be in the first

/// odd element. These can be the first element in a source or the element half

/// way through the source.


static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,

                                int &OddSrc, const RISCVSubtarget &Subtarget) {

  // We need to be able to widen elements to the next larger integer type or

  // use the vzip instruction at e64.

  if (VT.getScalarSizeInBits() >= Subtarget.getELen()) {

    if (!Subtarget.hasStdExtZvzip())

      return false;

    if (!isLegalVTForZvzipOperand(VT, Subtarget))

      return false;

  }


  int Size = Mask.size();

  int NumElts = VT.getVectorNumElements();

  assert(Size == (int)NumElts && "Unexpected mask size");


  SmallVector<unsigned, 2> StartIndexes;

  if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))

    return false;


  EvenSrc = StartIndexes[0];

  OddSrc = StartIndexes[1];


  // One source should be low half of first vector.

  if (EvenSrc != 0 && OddSrc != 0)

    return false;


  // Subvectors will be subtracted from either at the start of the two input

  // vectors, or at the start and middle of the first vector if it's an unary

  // interleave.

  // In both cases, HalfNumElts will be extracted.

  // We need to ensure that the extract indices are 0 or HalfNumElts otherwise

  // we'll create an illegal extract_subvector.

  // FIXME: We could support other values using a slidedown first.

  int HalfNumElts = NumElts / 2;

  return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);

}


/// Is this mask representing a masked combination of two slides?


static bool isMaskedSlidePair(ArrayRef<int> Mask,

                              std::array<std::pair<int, int>, 2> &SrcInfo) {

  if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))

    return false;


  // Avoid matching vselect idioms

  if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)

    return false;

  // Prefer vslideup as the second instruction, and identity

  // only as the initial instruction.

  if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||

      SrcInfo[1].second == 0)

    std::swap(SrcInfo[0], SrcInfo[1]);

  assert(SrcInfo[0].first != -1 && "Must find one slide");

  return true;

}


// Exactly matches the semantics of a previously existing custom matcher

// to allow migration to new matcher without changing output.


static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,

                            unsigned NumElts) {

  if (SrcInfo[1].first == -1)

    return true;

  return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&

         SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;

}


static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,

                          ArrayRef<int> Mask, unsigned Factor,

                          bool RequiredPolarity) {

  int NumElts = Mask.size();

  for (const auto &[Idx, M] : enumerate(Mask)) {

    if (M < 0)

      continue;

    int Src = M >= NumElts;

    int Diff = (int)Idx - (M % NumElts);

    bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;

    assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&

           "Must match exactly one of the two slides");

    if (RequiredPolarity != (C == (Idx / Factor) % 2))

      return false;

  }

  return true;

}


/// Given a shuffle which can be represented as a pair of two slides,

/// see if it is a pair-even idiom.

/// Pair-even is:

/// vs2: a0 a1 a2 a3

/// vs1: b0 b1 b2 b3

/// vd:  a0 b0 a2 b2


static bool isPairEven(const std::array<std::pair<int, int>, 2> &SrcInfo,

                       ArrayRef<int> Mask, unsigned &Factor) {

  Factor = SrcInfo[1].second;

  return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&

         Mask.size() % Factor == 0 &&

         isAlternating(SrcInfo, Mask, Factor, true);

}


/// Given a shuffle which can be represented as a pair of two slides,

/// see if it is a pair-odd idiom.

/// Pair-odd is:

/// vs2: a0 a1 a2 a3

/// vs1: b0 b1 b2 b3

/// vd:  a1 b1 a3 b3

/// Note that the operand order is swapped due to the way we canonicalize

/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.


static bool isPairOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,

                      ArrayRef<int> Mask, unsigned &Factor) {

  Factor = -SrcInfo[1].second;

  return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&

         Mask.size() % Factor == 0 &&

         isAlternating(SrcInfo, Mask, Factor, false);

}


// Lower a deinterleave shuffle to SRL and TRUNC.  Factor must be

// 2, 4, 8 and the integer type Factor-times larger than VT's

// element type must be a legal element type.

// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)

//                          -> [p, q, r, s] (Factor=2, Index=1)


static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT,

                                            SDValue Src, unsigned Factor,

                                            unsigned Index, SelectionDAG &DAG) {

  unsigned EltBits = VT.getScalarSizeInBits();

  ElementCount SrcEC = Src.getValueType().getVectorElementCount();

  MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),

                                   SrcEC.divideCoefficientBy(Factor));

  MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),

                               SrcEC.divideCoefficientBy(Factor));

  Src = DAG.getBitcast(WideSrcVT, Src);


  unsigned Shift = Index * EltBits;

  SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,

                            DAG.getConstant(Shift, DL, WideSrcVT));

  Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);

  MVT CastVT = ResVT.changeVectorElementType(VT.getVectorElementType());

  Res = DAG.getBitcast(CastVT, Res);

  return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);

}


/// Match a single source shuffle which is an identity except that some

/// particular element is repeated.  This can be lowered as a masked

/// vrgather.vi/vx.  Note that the two source form of this is handled

/// by the recursive splitting logic and doesn't need special handling.


static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN,

                                               const RISCVSubtarget &Subtarget,

                                               SelectionDAG &DAG) {


  SDLoc DL(SVN);

  MVT VT = SVN->getSimpleValueType(0);

  SDValue V1 = SVN->getOperand(0);

  assert(SVN->getOperand(1).isUndef());

  ArrayRef<int> Mask = SVN->getMask();

  const unsigned NumElts = VT.getVectorNumElements();

  MVT XLenVT = Subtarget.getXLenVT();


  std::optional<int> SplatIdx;

  for (auto [I, M] : enumerate(Mask)) {

    if (M == -1 || I == (unsigned)M)

      continue;

    if (SplatIdx && *SplatIdx != M)

      return SDValue();

    SplatIdx = M;

  }


  if (!SplatIdx)

    return SDValue();


  SmallVector<SDValue> MaskVals;

  for (int MaskIndex : Mask) {

    bool SelectMaskVal = MaskIndex == *SplatIdx;

    MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));

  }

  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");

  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);

  SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),

                                       SmallVector<int>(NumElts, *SplatIdx));

  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);

}


// Lower the following shuffle to vslidedown.

// a)

// t49: v8i8 = extract_subvector t13, Constant:i64<0>

// t109: v8i8 = extract_subvector t13, Constant:i64<8>

// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106

// b)

// t69: v16i16 = extract_subvector t68, Constant:i64<0>

// t23: v8i16 = extract_subvector t69, Constant:i64<0>

// t29: v4i16 = extract_subvector t23, Constant:i64<4>

// t26: v8i16 = extract_subvector t69, Constant:i64<8>

// t30: v4i16 = extract_subvector t26, Constant:i64<0>

// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30


static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,

                                               SDValue V1, SDValue V2,

                                               ArrayRef<int> Mask,

                                               const RISCVSubtarget &Subtarget,

                                               SelectionDAG &DAG) {

  auto findNonEXTRACT_SUBVECTORParent =

      [](SDValue Parent) -> std::pair<SDValue, uint64_t> {

    uint64_t Offset = 0;

    while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&

           // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from

           // a scalable vector. But we don't want to match the case.

           Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {

      Offset += Parent.getConstantOperandVal(1);

      Parent = Parent.getOperand(0);

    }

    return std::make_pair(Parent, Offset);

  };


  auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);

  auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);


  // Extracting from the same source.

  SDValue Src = V1Src;

  if (Src != V2Src)

    return SDValue();


  // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.

  SmallVector<int, 16> NewMask(Mask);

  for (size_t i = 0; i != NewMask.size(); ++i) {

    if (NewMask[i] == -1)

      continue;


    if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {

      NewMask[i] = NewMask[i] + V1IndexOffset;

    } else {

      // Minus NewMask.size() is needed. Otherwise, the b case would be

      // <5,6,7,12> instead of <5,6,7,8>.

      NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;

    }

  }


  // First index must be known and non-zero. It will be used as the slidedown

  // amount.

  if (NewMask[0] <= 0)

    return SDValue();


  // NewMask is also continuous.

  for (unsigned i = 1; i != NewMask.size(); ++i)

    if (NewMask[i - 1] + 1 != NewMask[i])

      return SDValue();


  MVT XLenVT = Subtarget.getXLenVT();

  MVT SrcVT = Src.getSimpleValueType();

  MVT ContainerVT = getContainerForFixedLengthVector(SrcVT, Subtarget);

  auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);

  SDValue Slidedown =

      getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),

                    convertToScalableVector(ContainerVT, Src, DAG, Subtarget),

                    DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);

  return DAG.getExtractSubvector(

      DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);

}


// Because vslideup leaves the destination elements at the start intact, we can

// use it to perform shuffles that insert subvectors:

//

// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>

// ->

// vsetvli zero, 8, e8, mf2, ta, ma

// vslideup.vi v8, v9, 4

//

// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>

// ->

// vsetvli zero, 5, e8, mf2, tu, ma

// vslideup.v1 v8, v9, 2


static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,

                                             SDValue V1, SDValue V2,

                                             ArrayRef<int> Mask,

                                             const RISCVSubtarget &Subtarget,

                                             SelectionDAG &DAG) {

  unsigned NumElts = VT.getVectorNumElements();

  int NumSubElts, Index;

  if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,

                                                Index))

    return SDValue();


  bool OpsSwapped = Mask[Index] < (int)NumElts;

  SDValue InPlace = OpsSwapped ? V2 : V1;

  SDValue ToInsert = OpsSwapped ? V1 : V2;


  MVT XLenVT = Subtarget.getXLenVT();

  MVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

  auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;

  // We slide up by the index that the subvector is being inserted at, and set

  // VL to the index + the number of elements being inserted.

  unsigned Policy =

      RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVVType::MASK_AGNOSTIC;

  // If the we're adding a suffix to the in place vector, i.e. inserting right

  // up to the very end of it, then we don't actually care about the tail.

  if (NumSubElts + Index >= (int)NumElts)

    Policy |= RISCVVType::TAIL_AGNOSTIC;


  InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);

  ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);

  SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);


  SDValue Res;

  // If we're inserting into the lowest elements, use a tail undisturbed

  // vmv.v.v.

  if (Index == 0)

    Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,

                      VL);

  else

    Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,

                      DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);

  return convertFromScalableVector(VT, Res, DAG, Subtarget);

}


// A shuffle of shuffles where the final data only is drawn from 2 input ops

// can be compressed into a single shuffle


static SDValue compressShuffleOfShuffles(ShuffleVectorSDNode *SVN,

                                         const RISCVSubtarget &Subtarget,

                                         SelectionDAG &DAG) {

  SDValue V1 = SVN->getOperand(0);

  SDValue V2 = SVN->getOperand(1);


  if (V1.getOpcode() != ISD::VECTOR_SHUFFLE ||

      V2.getOpcode() != ISD::VECTOR_SHUFFLE)

    return SDValue();


  if (!V1.hasOneUse() || !V2.hasOneUse())

    return SDValue();


  ArrayRef<int> Mask = SVN->getMask();

  ArrayRef<int> V1Mask = cast<ShuffleVectorSDNode>(V1.getNode())->getMask();

  ArrayRef<int> V2Mask = cast<ShuffleVectorSDNode>(V2.getNode())->getMask();

  unsigned NumElts = Mask.size();

  SmallVector<int> NewMask(NumElts, -1);

  for (unsigned Idx : seq<unsigned>(NumElts)) {

    int Lane = Mask[Idx];

    // Don't assign if poison

    if (Lane == -1)

      continue;

    int OrigLane;

    bool SecondOp = false;

    if ((unsigned)Lane < NumElts) {

      OrigLane = V1Mask[Lane];

    } else {

      OrigLane = V2Mask[Lane - NumElts];

      SecondOp = true;

    }

    if (OrigLane == -1)

      continue;

    // Don't handle if shuffling from a second operand

    if ((unsigned)OrigLane >= NumElts)

      return SDValue();

    if (SecondOp)

      OrigLane += NumElts;

    NewMask[Idx] = OrigLane;

  }


  EVT VT = SVN->getValueType(0);

  SDLoc DL(SVN);


  return DAG.getVectorShuffle(VT, DL, V1->getOperand(0), V2->getOperand(0),

                              NewMask);

}


/// Match v(f)slide1up/down idioms.  These operations involve sliding

/// N-1 elements to make room for an inserted scalar at one end.


static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,

                                            SDValue V1, SDValue V2,

                                            ArrayRef<int> Mask,

                                            const RISCVSubtarget &Subtarget,

                                            SelectionDAG &DAG) {

  bool OpsSwapped = false;

  if (!isa<BuildVectorSDNode>(V1)) {

    if (!isa<BuildVectorSDNode>(V2))

      return SDValue();

    std::swap(V1, V2);

    OpsSwapped = true;

  }

  SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();

  if (!Splat)

    return SDValue();


  // Return true if the mask could describe a slide of Mask.size() - 1

  // elements from concat_vector(V1, V2)[Base:] to [Offset:].

  auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {

    const unsigned S = (Offset > 0) ? 0 : -Offset;

    const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);

    for (unsigned i = S; i != E; ++i)

      if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)

        return false;

    return true;

  };


  const unsigned NumElts = VT.getVectorNumElements();

  bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);

  if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))

    return SDValue();


  const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];

  // Inserted lane must come from splat, undef scalar is legal but not profitable.

  if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)

    return SDValue();


  MVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

  auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +

  // vslide1{down,up}.vx instead.

  if ((VT.getVectorElementType() == MVT::bf16 &&

       !Subtarget.hasVInstructionsBF16()) ||

      (VT.getVectorElementType() == MVT::f16 &&

       !Subtarget.hasVInstructionsF16())) {

    MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();

    Splat =

        DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);

    V2 = DAG.getBitcast(

        IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));

    SDValue Vec = DAG.getNode(

        IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,

        IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);

    Vec = DAG.getBitcast(ContainerVT, Vec);

    return convertFromScalableVector(VT, Vec, DAG, Subtarget);

  }


  auto OpCode = IsVSlidedown ?

    (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :

    (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);

  if (!VT.isFloatingPoint())

    Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);

  auto Vec = DAG.getNode(OpCode, DL, ContainerVT,

                         DAG.getUNDEF(ContainerVT),

                         convertToScalableVector(ContainerVT, V2, DAG, Subtarget),

                         Splat, TrueMask, VL);

  return convertFromScalableVector(VT, Vec, DAG, Subtarget);

}


/// Match a mask which "spreads" the leading elements of a vector evenly

/// across the result.  Factor is the spread amount, and Index is the

/// offset applied.  (on success, Index < Factor)  This is the inverse

/// of a deinterleave with the same Factor and Index.  This is analogous

/// to an interleave, except that all but one lane is undef.


bool RISCVTargetLowering::isSpreadMask(ArrayRef<int> Mask, unsigned Factor,

                                       unsigned &Index) {

  SmallVector<bool> LaneIsUndef(Factor, true);

  for (unsigned i = 0; i < Mask.size(); i++)

    LaneIsUndef[i % Factor] &= (Mask[i] == -1);


  bool Found = false;

  for (unsigned i = 0; i < Factor; i++) {

    if (LaneIsUndef[i])

      continue;

    if (Found)

      return false;

    Index = i;

    Found = true;

  }

  if (!Found)

    return false;


  for (unsigned i = 0; i < Mask.size() / Factor; i++) {

    unsigned j = i * Factor + Index;

    if (Mask[j] != -1 && (unsigned)Mask[j] != i)

      return false;

  }

  return true;

}


static SDValue lowerZvzipVPAIR(unsigned Opc, SDValue Op0, SDValue Op1,

                               const SDLoc &DL, SelectionDAG &DAG,

                               const RISCVSubtarget &Subtarget) {

  assert(RISCVISD::VPAIRE_VL == Opc || RISCVISD::VPAIRO_VL == Opc);

  assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());


  MVT VT = Op0.getSimpleValueType();

  MVT IntVT = VT.changeVectorElementTypeToInteger();

  Op0 = DAG.getBitcast(IntVT, Op0);

  Op1 = DAG.getBitcast(IntVT, Op1);


  MVT ContainerVT = IntVT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(IntVT, Subtarget);

    Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);

    Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

  }


  MVT InnerVT = ContainerVT;

  auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);


  SDValue Passthru = DAG.getUNDEF(InnerVT);

  SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);

  if (IntVT.isFixedLengthVector())

    Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);

  Res = DAG.getBitcast(VT, Res);

  return Res;

}


static SDValue lowerZvzipVZIP(SDValue Op0, SDValue Op1, const SDLoc &DL,

                              SelectionDAG &DAG,

                              const RISCVSubtarget &Subtarget) {

  assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());

  MVT VT = Op0.getSimpleValueType();

  MVT IntVT = VT.changeVectorElementTypeToInteger();

  Op0 = DAG.getBitcast(IntVT, Op0);

  Op1 = DAG.getBitcast(IntVT, Op1);

  MVT ContainerVT = IntVT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(IntVT, Subtarget);

    Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);

    Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

  }

  MVT ResVT = ContainerVT.getDoubleNumVectorElementsVT();

  auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget);

  SDValue Passthru = DAG.getUNDEF(ResVT);

  SDValue Res =

      DAG.getNode(RISCVISD::VZIP_VL, DL, ResVT, Op0, Op1, Passthru, Mask, VL);

  if (IntVT.isFixedLengthVector())

    Res = convertFromScalableVector(IntVT.getDoubleNumVectorElementsVT(), Res,

                                    DAG, Subtarget);

  Res = DAG.getBitcast(VT.getDoubleNumVectorElementsVT(), Res);

  return Res;

}


static SDValue lowerZvzipVUNZIP(unsigned Opc, SDValue Op, const SDLoc &DL,

                                SelectionDAG &DAG,

                                const RISCVSubtarget &Subtarget) {

  assert(Opc == RISCVISD::VUNZIPE_VL || Opc == RISCVISD::VUNZIPO_VL);

  MVT VT = Op.getSimpleValueType();

  assert(VT.getVectorMinNumElements() >= 2);


  MVT IntVT = VT.changeVectorElementTypeToInteger();

  Op = DAG.getBitcast(IntVT, Op);

  MVT ContainerVT = IntVT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(IntVT, Subtarget);

    // For E64 with LMUL <= 1, we can't represent a smaller fractional LMUL for

    // the result (LMUL <= 1/2 is not valid for E64). We must widen the input

    // container to at least LMUL=2 so the result can be LMUL=1.

    if (ContainerVT.getVectorElementType() == MVT::i64 &&

        RISCVTargetLowering::getLMUL(ContainerVT) == RISCVVType::LMUL_1) {

      ContainerVT = MVT::getScalableVectorVT(MVT::i64, 2);

    }

    Op = convertToScalableVector(ContainerVT, Op, DAG, Subtarget);

  }


  MVT ResVT = ContainerVT.getHalfNumVectorElementsVT();

  MVT HalfVT = VT.getHalfNumVectorElementsVT();

  MVT HalfIntVT = IntVT.getHalfNumVectorElementsVT();

  auto [Mask, VL] = getDefaultVLOps(ResVT, ResVT, DL, DAG, Subtarget);

  if (VT.isFixedLengthVector())

    VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,

                         Subtarget.getXLenVT());

  SDValue Passthru = DAG.getUNDEF(ResVT);

  SDValue Res = DAG.getNode(Opc, DL, ResVT, Op, Passthru, Mask, VL);

  if (HalfIntVT.isFixedLengthVector())

    Res = convertFromScalableVector(HalfIntVT, Res, DAG, Subtarget);

  Res = DAG.getBitcast(HalfVT, Res);

  return Res;

}


// Given a vector a, b, c, d return a vector Factor times longer

// with Factor-1 undef's between elements. Ex:

//   a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)

//   undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)


static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,

                                 const SDLoc &DL, SelectionDAG &DAG) {


  MVT VT = V.getSimpleValueType();

  unsigned EltBits = VT.getScalarSizeInBits();

  ElementCount EC = VT.getVectorElementCount();

  V = DAG.getBitcast(VT.changeTypeToInteger(), V);


  MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);


  SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);

  // TODO: On rv32, the constant becomes a splat_vector_parts which does not

  // allow the SHL to fold away if Index is 0.

  if (Index != 0)

    Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,

                         DAG.getConstant(EltBits * Index, DL, WideVT));

  // Make sure to use original element type

  MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),

                                  EC.multiplyCoefficientBy(Factor));

  return DAG.getBitcast(ResultVT, Result);

}


// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx

// to create an interleaved vector of <[vscale x] n*2 x ty>.

// This requires that the size of ty is less than the subtarget's maximum ELEN.


static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,

                                     const SDLoc &DL, SelectionDAG &DAG,

                                     const RISCVSubtarget &Subtarget) {


  // FIXME: Not only does this optimize the code, it fixes some correctness

  // issues because MIR does not have freeze.

  if (EvenV.isUndef())

    return getWideningSpread(OddV, 2, 1, DL, DAG);

  if (OddV.isUndef())

    return getWideningSpread(EvenV, 2, 0, DL, DAG);


  MVT VecVT = EvenV.getSimpleValueType();

  MVT VecContainerVT = VecVT; // <vscale x n x ty>

  // Convert fixed vectors to scalable if needed

  if (VecContainerVT.isFixedLengthVector()) {

    VecContainerVT = getContainerForFixedLengthVector(VecVT, Subtarget);

    EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);

    OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);

  }


  assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());


  // We're working with a vector of the same size as the resulting

  // interleaved vector, but with half the number of elements and

  // twice the SEW (Hence the restriction on not using the maximum

  // ELEN)

  MVT WideVT =

      MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),

                       VecVT.getVectorElementCount());

  MVT WideContainerVT = WideVT; // <vscale x n x ty*2>

  if (WideContainerVT.isFixedLengthVector())

    WideContainerVT = getContainerForFixedLengthVector(WideVT, Subtarget);


  // Bitcast the input vectors to integers in case they are FP

  VecContainerVT = VecContainerVT.changeTypeToInteger();

  EvenV = DAG.getBitcast(VecContainerVT, EvenV);

  OddV = DAG.getBitcast(VecContainerVT, OddV);


  auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);

  SDValue Passthru = DAG.getUNDEF(WideContainerVT);


  SDValue Interleaved;

  if (Subtarget.hasStdExtZvbb()) {

    // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.

    SDValue OffsetVec =

        DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);

    Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,

                              OffsetVec, Passthru, Mask, VL);

    Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,

                              Interleaved, EvenV, Passthru, Mask, VL);

  } else {

    // FIXME: We should freeze the odd vector here. We already handled the case

    // of provably undef/poison above.


    // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with

    // vwaddu.vv

    Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,

                              OddV, Passthru, Mask, VL);


    // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)

    SDValue AllOnesVec = DAG.getSplatVector(

        VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));

    SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,

                                  OddV, AllOnesVec, Passthru, Mask, VL);


    // Add the two together so we get

    //   (OddV * 0xff...ff) + (OddV + EvenV)

    // = (OddV * 0x100...00) + EvenV

    // = (OddV << VecVT.getScalarSizeInBits()) + EvenV

    // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx

    Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,

                              Interleaved, OddsMul, Passthru, Mask, VL);

  }


  // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>

  MVT ResultContainerVT = MVT::getVectorVT(

      VecVT.getVectorElementType(), // Make sure to use original type

      VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));

  Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);


  // Convert back to a fixed vector if needed

  MVT ResultVT =

      MVT::getVectorVT(VecVT.getVectorElementType(),

                       VecVT.getVectorElementCount().multiplyCoefficientBy(2));

  if (ResultVT.isFixedLengthVector())

    Interleaved =

        convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);


  return Interleaved;

}


// If we have a vector of bits that we want to reverse, we can use a vbrev on a

// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.


static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,

                                      SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  SDLoc DL(SVN);

  MVT VT = SVN->getSimpleValueType(0);

  SDValue V = SVN->getOperand(0);

  unsigned NumElts = VT.getVectorNumElements();


  assert(VT.getVectorElementType() == MVT::i1);


  if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),

                                        SVN->getMask().size()) ||

      !SVN->getOperand(1).isUndef())

    return SDValue();


  unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));

  EVT ViaVT = EVT::getVectorVT(

      *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);

  EVT ViaBitVT =

      EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());


  // If we don't have zvbb or the larger element type > ELEN, the operation will

  // be illegal.

  if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,

                                                               ViaVT) ||

      !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))

    return SDValue();


  // If the bit vector doesn't fit exactly into the larger element type, we need

  // to insert it into the larger vector and then shift up the reversed bits

  // afterwards to get rid of the gap introduced.

  if (ViaEltSize > NumElts)

    V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);


  SDValue Res =

      DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));


  // Shift up the reversed bits if the vector didn't exactly fit into the larger

  // element type.

  if (ViaEltSize > NumElts)

    Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,

                      DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));


  Res = DAG.getBitcast(ViaBitVT, Res);


  if (ViaEltSize > NumElts)

    Res = DAG.getExtractSubvector(DL, VT, Res, 0);

  return Res;

}


static bool isLegalBitRotate(ArrayRef<int> Mask, EVT VT,

                             const RISCVSubtarget &Subtarget,

                             MVT &RotateVT, unsigned &RotateAmt) {

  unsigned NumElts = VT.getVectorNumElements();

  unsigned EltSizeInBits = VT.getScalarSizeInBits();

  unsigned NumSubElts;

  if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,

                                          NumElts, NumSubElts, RotateAmt))

    return false;

  RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),

                              NumElts / NumSubElts);


  // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.

  return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);

}


// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can

// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this

// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.


static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,

                                           SelectionDAG &DAG,

                                           const RISCVSubtarget &Subtarget) {

  SDLoc DL(SVN);


  EVT VT = SVN->getValueType(0);

  unsigned RotateAmt;

  MVT RotateVT;

  if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))

    return SDValue();


  SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));


  SDValue Rotate;

  // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,

  // so canonicalize to vrev8.

  if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)

    Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);

  else

    Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,

                         DAG.getConstant(RotateAmt, DL, RotateVT));


  return DAG.getBitcast(VT, Rotate);

}


// If compiling with an exactly known VLEN, see if we can split a

// shuffle on m2 or larger into a small number of m1 sized shuffles

// which write each destination registers exactly once.


static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,

                                            SelectionDAG &DAG,

                                            const RISCVSubtarget &Subtarget) {

  SDLoc DL(SVN);

  MVT VT = SVN->getSimpleValueType(0);

  SDValue V1 = SVN->getOperand(0);

  SDValue V2 = SVN->getOperand(1);

  ArrayRef<int> Mask = SVN->getMask();


  // If we don't know exact data layout, not much we can do.  If this

  // is already m1 or smaller, no point in splitting further.

  const auto VLen = Subtarget.getRealVLen();

  if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)

    return SDValue();


  // Avoid picking up bitrotate patterns which we have a linear-in-lmul

  // expansion for.

  unsigned RotateAmt;

  MVT RotateVT;

  if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))

    return SDValue();


  MVT ElemVT = VT.getVectorElementType();

  unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();


  EVT ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

  MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);

  MVT M1VT = getContainerForFixedLengthVector(OneRegVT, Subtarget);

  assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));

  unsigned NumOpElts = M1VT.getVectorMinNumElements();

  unsigned NumElts = ContainerVT.getVectorMinNumElements();

  unsigned NumOfSrcRegs = NumElts / NumOpElts;

  unsigned NumOfDestRegs = NumElts / NumOpElts;

  // The following semantically builds up a fixed length concat_vector

  // of the component shuffle_vectors.  We eagerly lower to scalable here

  // to avoid DAG combining it back to a large shuffle_vector again.

  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);

  SmallVector<SmallVector<std::tuple<unsigned, unsigned, SmallVector<int>>>>

      Operands;

  processShuffleMasks(

      Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,

      [&]() { Operands.emplace_back(); },

      [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {

        Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,

                                             SmallVector<int>(SrcSubMask));

      },

      [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {

        if (NewReg)

          Operands.emplace_back();

        Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));

      });

  assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");

  // Note: check that we do not emit too many shuffles here to prevent code

  // size explosion.

  // TODO: investigate, if it can be improved by extra analysis of the masks to

  // check if the code is more profitable.

  unsigned NumShuffles = std::accumulate(

      Operands.begin(), Operands.end(), 0u,

      [&](unsigned N,

          ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {

        if (Data.empty())

          return N;

        N += Data.size();

        for (const auto &P : Data) {

          unsigned Idx2 = std::get<1>(P);

          ArrayRef<int> Mask = std::get<2>(P);

          if (Idx2 != UINT_MAX)

            ++N;

          else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))

            --N;

        }

        return N;

      });

  if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||

      (NumOfDestRegs <= 2 && NumShuffles >= 4))

    return SDValue();

  auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {

    SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);

    SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);

    return SubVec;

  };

  auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,

                                        ArrayRef<int> Mask) {

    SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);

    return SubVec;

  };

  SDValue Vec = DAG.getUNDEF(ContainerVT);

  for (auto [I, Data] : enumerate(Operands)) {

    if (Data.empty())

      continue;

    SmallDenseMap<unsigned, SDValue, 4> Values;

    for (unsigned I : seq<unsigned>(Data.size())) {

      const auto &[Idx1, Idx2, _] = Data[I];

      // If the shuffle contains permutation of odd number of elements,

      // Idx1 might be used already in the first iteration.

      //

      // Idx1 = shuffle Idx1, Idx2

      // Idx1 = shuffle Idx1, Idx3

      SDValue &V = Values.try_emplace(Idx1).first->getSecond();

      if (!V)

        V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,

                         (Idx1 % NumOfSrcRegs) * NumOpElts);

      if (Idx2 != UINT_MAX) {

        SDValue &V = Values.try_emplace(Idx2).first->getSecond();

        if (!V)

          V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,

                           (Idx2 % NumOfSrcRegs) * NumOpElts);

      }

    }

    SDValue V;

    for (const auto &[Idx1, Idx2, Mask] : Data) {

      SDValue V1 = Values.at(Idx1);

      SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);

      V = PerformShuffle(V1, V2, Mask);

      Values[Idx1] = V;

    }


    unsigned InsertIdx = I * NumOpElts;

    V = convertToScalableVector(M1VT, V, DAG, Subtarget);

    Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);

  }

  return convertFromScalableVector(VT, Vec, DAG, Subtarget);

}


// Matches a subset of compress masks with a contiguous prefix of output

// elements.  This could be extended to allow gaps by deciding which

// source elements to spuriously demand.


static bool isCompressMask(ArrayRef<int> Mask) {

  int Last = -1;

  bool SawUndef = false;

  for (const auto &[Idx, M] : enumerate(Mask)) {

    if (M == -1) {

      SawUndef = true;

      continue;

    }

    if (SawUndef)

      return false;

    if (Idx > (unsigned)M)

      return false;

    if (M <= Last)

      return false;

    Last = M;

  }

  return true;

}


/// Given a shuffle where the indices are disjoint between the two sources,

/// e.g.:

///

/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>

///

/// Merge the two sources into one and do a single source shuffle:

///

/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>

/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>

///

/// A vselect will either be merged into a masked instruction or be lowered as a

/// vmerge.vvm, which is cheaper than a vrgather.vv.


static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,

                                           SelectionDAG &DAG,

                                           const RISCVSubtarget &Subtarget) {

  MVT VT = SVN->getSimpleValueType(0);

  MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(SVN);


  const ArrayRef<int> Mask = SVN->getMask();


  // Work out which source each lane will come from.

  SmallVector<int, 16> Srcs(Mask.size(), -1);


  for (int Idx : Mask) {

    if (Idx == -1)

      continue;

    unsigned SrcIdx = Idx % Mask.size();

    int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;

    if (Srcs[SrcIdx] == -1)

      // Mark this source as using this lane.

      Srcs[SrcIdx] = Src;

    else if (Srcs[SrcIdx] != Src)

      // The other source is using this lane: not disjoint.

      return SDValue();

  }


  SmallVector<SDValue> SelectMaskVals;

  for (int Lane : Srcs) {

    if (Lane == -1)

      SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));

    else

      SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));

  }

  MVT MaskVT = VT.changeVectorElementType(MVT::i1);

  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);

  SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,

                               SVN->getOperand(0), SVN->getOperand(1));


  // Move all indices relative to the first source.

  SmallVector<int> NewMask(Mask.size());

  for (unsigned I = 0; I < Mask.size(); I++) {

    if (Mask[I] == -1)

      NewMask[I] = -1;

    else

      NewMask[I] = Mask[I] % Mask.size();

  }


  return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);

}


/// Is this mask local (i.e. elements only move within their local span), and

/// repeating (that is, the same rearrangement is being done within each span)?


static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {

  // Require a prefix from the original mask until the consumer code

  // is adjusted to rewrite the mask instead of just taking a prefix.

  for (auto [I, M] : enumerate(Mask)) {

    if (M == -1)

      continue;

    if ((M / Span) != (int)(I / Span))

      return false;

    int SpanIdx = I % Span;

    int Expected = M % Span;

    if (Mask[SpanIdx] != Expected)

      return false;

  }

  return true;

}


/// Is this mask only using elements from the first span of the input?


static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {

  return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });

}


/// Return true for a mask which performs an arbitrary shuffle within the first

/// span, and then repeats that same result across all remaining spans.  Note

/// that this doesn't check if all the inputs come from a single span!


static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {

  // Require a prefix from the original mask until the consumer code

  // is adjusted to rewrite the mask instead of just taking a prefix.

  for (auto [I, M] : enumerate(Mask)) {

    if (M == -1)

      continue;

    int SpanIdx = I % Span;

    if (Mask[SpanIdx] != M)

      return false;

  }

  return true;

}


/// Try to widen element type to get a new mask value for a better permutation

/// sequence.  This doesn't try to inspect the widened mask for profitability;

/// we speculate the widened form is equal or better.  This has the effect of

/// reducing mask constant sizes - allowing cheaper materialization sequences

/// - and index sequence sizes - reducing register pressure and materialization

/// cost, at the cost of (possibly) an extra VTYPE toggle.


static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  MVT ScalarVT = VT.getVectorElementType();

  unsigned ElementSize = ScalarVT.getFixedSizeInBits();

  SDValue V0 = Op.getOperand(0);

  SDValue V1 = Op.getOperand(1);

  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();


  // Avoid wasted work leading to isTypeLegal check failing below

  if (ElementSize > 32)

    return SDValue();


  SmallVector<int, 8> NewMask;

  if (!widenShuffleMaskElts(Mask, NewMask))

    return SDValue();


  MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)

                                      : MVT::getIntegerVT(ElementSize * 2);

  MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);

  if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))

    return SDValue();

  V0 = DAG.getBitcast(NewVT, V0);

  V1 = DAG.getBitcast(NewVT, V1);

  return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));

}


SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,

                                                 SelectionDAG &DAG) const {

  SDValue V1 = Op.getOperand(0);

  SDValue V2 = Op.getOperand(1);

  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();

  MVT VT = Op.getSimpleValueType();

  unsigned NumElts = VT.getVectorNumElements();

  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());


  if (VT.getVectorElementType() == MVT::i1) {

    // Lower to a vror.vi of a larger element type if possible before we promote

    // i1s to i8s.

    if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))

      return V;

    if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))

      return V;


    // Promote i1 shuffle to i8 shuffle.

    MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());

    V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);

    V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)

                      : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);

    SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());

    return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),

                        ISD::SETNE);

  }


  MVT ContainerVT = getContainerForFixedLengthVector(VT);


  // Store the return value in a single variable instead of structured bindings

  // so that we can pass it to GetSlide below, which cannot capture structured

  // bindings until C++20.

  auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

  auto [TrueMask, VL] = TrueMaskVL;


  if (SVN->isSplat()) {

    const int Lane = SVN->getSplatIndex();

    if (Lane >= 0) {

      MVT SVT = VT.getVectorElementType();


      // Turn splatted vector load into a strided load with an X0 stride.

      SDValue V = V1;

      // Peek through CONCAT_VECTORS as VectorCombine can concat a vector

      // with undef.

      // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?

      int Offset = Lane;

      if (V.getOpcode() == ISD::CONCAT_VECTORS) {

        int OpElements =

            V.getOperand(0).getSimpleValueType().getVectorNumElements();

        V = V.getOperand(Offset / OpElements);

        Offset %= OpElements;

      }


      // We need to ensure the load isn't atomic or volatile.

      if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {

        auto *Ld = cast<LoadSDNode>(V);

        Offset *= SVT.getStoreSize();

        SDValue NewAddr = DAG.getMemBasePlusOffset(

            Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);


        // If this is SEW=64 on RV32, use a strided load with a stride of x0.

        if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {

          SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

          SDValue IntID =

              DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);

          SDValue Ops[] = {Ld->getChain(),

                           IntID,

                           DAG.getUNDEF(ContainerVT),

                           NewAddr,

                           DAG.getRegister(RISCV::X0, XLenVT),

                           VL};

          SDValue NewLoad = DAG.getMemIntrinsicNode(

              ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,

              DAG.getMachineFunction().getMachineMemOperand(

                  Ld->getMemOperand(), Offset, SVT.getStoreSize()));

          DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);

          return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);

        }


        MVT SplatVT = ContainerVT;


        // f16 with zvfhmin and bf16 need to use an integer scalar load.

        if (SVT == MVT::bf16 ||

            (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {

          SVT = MVT::i16;

          SplatVT = ContainerVT.changeVectorElementType(SVT);

        }


        // Otherwise use a scalar load and splat. This will give the best

        // opportunity to fold a splat into the operation. ISel can turn it into

        // the x0 strided load if we aren't able to fold away the select.

        if (SVT.isFloatingPoint())

          V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,

                          Ld->getPointerInfo().getWithOffset(Offset),

                          Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());

        else

          V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,

                             Ld->getPointerInfo().getWithOffset(Offset), SVT,

                             Ld->getBaseAlign(),

                             Ld->getMemOperand()->getFlags());

        DAG.makeEquivalentMemoryOrdering(Ld, V);


        unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL

                                                 : RISCVISD::VMV_V_X_VL;

        SDValue Splat =

            DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);

        Splat = DAG.getBitcast(ContainerVT, Splat);

        return convertFromScalableVector(VT, Splat, DAG, Subtarget);

      }


      V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

      assert(Lane < (int)NumElts && "Unexpected lane!");

      SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,

                                   V1, DAG.getConstant(Lane, DL, XLenVT),

                                   DAG.getUNDEF(ContainerVT), TrueMask, VL);

      return convertFromScalableVector(VT, Gather, DAG, Subtarget);

    }

  }


  // For exact VLEN m2 or greater, try to split to m1 operations if we

  // can split cleanly.

  if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))

    return V;


  ArrayRef<int> Mask = SVN->getMask();


  if (SDValue V =

          lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))

    return V;


  if (SDValue V =

          lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))

    return V;


  // A bitrotate will be one instruction on Zvkb, so try to lower to it first if

  // available.

  if (Subtarget.hasStdExtZvkb())

    if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))

      return V;


  if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&

      NumElts != 2)

    return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);


  // If this is a deinterleave(2,4,8) and we can widen the vector, then we can

  // use shift and truncate to perform the shuffle.

  // TODO: For Factor=6, we can perform the first step of the deinterleave via

  // shift-and-trunc reducing total cost for everything except an mf8 result.

  // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough

  // to do the entire operation.

  if (VT.getScalarSizeInBits() < Subtarget.getELen()) {

    const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();

    assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);

    for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {

      unsigned Index = 0;

      if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&

          1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {

        if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))

          return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);

        if (1 < count_if(Mask,

                         [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&

            1 < count_if(Mask, [&Mask](int Idx) {

              return Idx >= (int)Mask.size();

            })) {

          // Narrow each source and concatenate them.

          // FIXME: For small LMUL it is better to concatenate first.

          MVT EltVT = VT.getVectorElementType();

          auto EltCnt = VT.getVectorElementCount();

          MVT SubVT =

              MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));


          SDValue Lo =

              getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);

          SDValue Hi =

              getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);


          SDValue Concat =

              DAG.getNode(ISD::CONCAT_VECTORS, DL,

                          SubVT.getDoubleNumVectorElementsVT(), Lo, Hi);

          if (Factor == 2)

            return Concat;


          SDValue Vec = DAG.getUNDEF(VT);

          return DAG.getInsertSubvector(DL, Vec, Concat, 0);

        }

      }

    }

  }


  // If this is a deinterleave(2), try using vunzip{e,o}. This mostly catches

  // e64 which can't match above.

  unsigned Index = 0;

  if (Subtarget.hasStdExtZvzip() &&

      ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) &&

      1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {

    bool UsesBothSources =

        1 < count_if(Mask,

                     [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&

        1 < count_if(Mask,

                     [&Mask](int Idx) { return Idx >= (int)Mask.size(); });


    if (isLegalVTForZvzipOperand(VT, Subtarget)) {

      unsigned Opc = Index == 0 ? RISCVISD::VUNZIPE_VL : RISCVISD::VUNZIPO_VL;

      MVT NewVT = VT.getDoubleNumVectorElementsVT();

      if (isTypeLegal(NewVT)) {

        SDValue Op;

        if (V2.isUndef()) {

          Op = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2);

        } else if (auto VLEN = Subtarget.getRealVLen();

                   VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0) {

          Op = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2);

        } else if (SDValue Src = foldConcatVector(V1, V2)) {

          Op = DAG.getExtractSubvector(DL, NewVT, Src, 0);

        }

        if (Op)

          return lowerZvzipVUNZIP(Opc, Op, DL, DAG, Subtarget);

      }


      if (UsesBothSources &&

          V1.getSimpleValueType().getVectorMinNumElements() >= 2 &&

          V2.getSimpleValueType().getVectorMinNumElements() >= 2) {

        SDValue Lo = lowerZvzipVUNZIP(Opc, V1, DL, DAG, Subtarget);

        SDValue Hi = lowerZvzipVUNZIP(Opc, V2, DL, DAG, Subtarget);

        return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);

      }

    }

  }


  if (SDValue V =

          lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))

    return V;


  // Detect an interleave shuffle and lower to

  // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))

  int EvenSrc, OddSrc;

  if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&

      !(NumElts == 2 &&

        ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {

    // Extract the halves of the vectors.

    MVT HalfVT = VT.getHalfNumVectorElementsVT();


    // Recognize if one half is actually undef; the matching above will

    // otherwise reuse the even stream for the undef one.  This improves

    // spread(2) shuffles.

    bool LaneIsUndef[2] = { true, true};

    for (const auto &[Idx, M] : enumerate(Mask))

      LaneIsUndef[Idx % 2] &= (M == -1);


    int Size = Mask.size();

    SDValue EvenV, OddV;

    if (LaneIsUndef[0]) {

      EvenV = DAG.getUNDEF(HalfVT);

    } else {

      assert(EvenSrc >= 0 && "Undef source?");

      EvenV = (EvenSrc / Size) == 0 ? V1 : V2;

      EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);

    }


    if (LaneIsUndef[1]) {

      OddV = DAG.getUNDEF(HalfVT);

    } else {

      assert(OddSrc >= 0 && "Undef source?");

      OddV = (OddSrc / Size) == 0 ? V1 : V2;

      OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);

    }


    // Prefer vzip if available.

    // TODO: Extend to matching vzip if EvenSrc and OddSrc allow.

    if (Subtarget.hasStdExtZvzip() && isLegalVTForZvzipOperand(VT, Subtarget))

      return lowerZvzipVZIP(EvenV, OddV, DL, DAG, Subtarget);

    return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);

  }


  // Recognize a pattern which can handled via a pair of vslideup/vslidedown

  // instructions (in any combination) with masking on the second instruction.

  // Also handles masked slides into an identity source, and single slides

  // without masking.  Avoid matching bit rotates (which are not also element

  // rotates) as slide pairs.  This is a performance heuristic, not a

  // functional check.

  std::array<std::pair<int, int>, 2> SrcInfo;

  unsigned RotateAmt;

  MVT RotateVT;

  if (::isMaskedSlidePair(Mask, SrcInfo) &&

      (isElementRotate(SrcInfo, NumElts) ||

       !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {

    SDValue Sources[2];

    auto GetSourceFor = [&](const std::pair<int, int> &Info) {

      int SrcIdx = Info.first;

      assert(SrcIdx == 0 || SrcIdx == 1);

      SDValue &Src = Sources[SrcIdx];

      if (!Src) {

        SDValue SrcV = SrcIdx == 0 ? V1 : V2;

        Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);

      }

      return Src;

    };

    auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,

                        SDValue Passthru) {

      auto [TrueMask, VL] = TrueMaskVL;

      SDValue SrcV = GetSourceFor(Src);

      int SlideAmt = Src.second;

      if (SlideAmt == 0) {

        // Should never be second operation

        assert(Mask == TrueMask);

        return SrcV;

      }

      if (SlideAmt < 0)

        return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,

                             DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,

                             RISCVVType::TAIL_AGNOSTIC);

      return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,

                         DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,

                         RISCVVType::TAIL_AGNOSTIC);

    };


    if (SrcInfo[1].first == -1) {

      SDValue Res = DAG.getUNDEF(ContainerVT);

      Res = GetSlide(SrcInfo[0], TrueMask, Res);

      return convertFromScalableVector(VT, Res, DAG, Subtarget);

    }


    if (Subtarget.hasStdExtZvzip()) {

      bool TryWiden = false;

      unsigned Factor;

      if (isPairEven(SrcInfo, Mask, Factor)) {

        if (Factor == 1) {

          SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;

          SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;

          return lowerZvzipVPAIR(RISCVISD::VPAIRE_VL, Src1, Src2, DL, DAG,

                                 Subtarget);

        }

        TryWiden = true;

      }

      if (isPairOdd(SrcInfo, Mask, Factor)) {

        if (Factor == 1) {

          SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;

          SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;

          return lowerZvzipVPAIR(RISCVISD::VPAIRO_VL, Src1, Src2, DL, DAG,

                                 Subtarget);

        }

        TryWiden = true;

      }

      // If we found a widening oppurtunity which would let us form a

      // pair-even or pair-odd, use the generic code to widen the shuffle

      // and recurse through this logic.

      if (TryWiden)

        if (SDValue V = tryWidenMaskForShuffle(Op, DAG))

          return V;

    }


    // Build the mask.  Note that vslideup unconditionally preserves elements

    // below the slide amount in the destination, and thus those elements are

    // undefined in the mask.  If the mask ends up all true (or undef), it

    // will be folded away by general logic.

    SmallVector<SDValue> MaskVals;

    for (const auto &[Idx, M] : enumerate(Mask)) {

      if (M < 0 ||

          (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {

        MaskVals.push_back(DAG.getUNDEF(XLenVT));

        continue;

      }

      int Src = M >= (int)NumElts;

      int Diff = (int)Idx - (M % NumElts);

      bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;

      assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&

             "Must match exactly one of the two slides");

      MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));

    }

    assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");

    MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

    SDValue SelectMask = convertToScalableVector(

        ContainerVT.changeVectorElementType(MVT::i1),

        DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);


    SDValue Res = DAG.getUNDEF(ContainerVT);

    Res = GetSlide(SrcInfo[0], TrueMask, Res);

    Res = GetSlide(SrcInfo[1], SelectMask, Res);

    return convertFromScalableVector(VT, Res, DAG, Subtarget);

  }


  // Handle any remaining single source shuffles

  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");

  if (V2.isUndef()) {

    // We might be able to express the shuffle as a bitrotate. But even if we

    // don't have Zvkb and have to expand, the expanded sequence of approx. 2

    // shifts and a vor will have a higher throughput than a vrgather.

    if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))

      return V;


    if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))

      return V;


    // Match a spread(4,8) which can be done via extend and shift.  Spread(2)

    // is fully covered in interleave(2) above, so it is ignored here.

    if (VT.getScalarSizeInBits() < Subtarget.getELen()) {

      unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();

      assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);

      for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {

        unsigned Index;

        if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {

          MVT NarrowVT =

              MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);

          SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);

          return getWideningSpread(Src, Factor, Index, DL, DAG);

        }

      }

    }


    // If only a prefix of the source elements influence a prefix of the

    // destination elements, try to see if we can reduce the required LMUL

    unsigned MinVLen = Subtarget.getRealMinVLen();

    unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();

    if (NumElts > MinVLMAX) {

      unsigned MaxIdx = 0;

      for (auto [I, M] : enumerate(Mask)) {

        if (M == -1)

          continue;

        MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);

      }

      unsigned NewNumElts =

          std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));

      if (NewNumElts != NumElts) {

        MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);

        V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);

        SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),

                                           Mask.take_front(NewNumElts));

        return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);

      }

    }


    // Before hitting generic lowering fallbacks, try to widen the mask

    // to a wider SEW.

    if (SDValue V = tryWidenMaskForShuffle(Op, DAG))

      return V;


    // Can we generate a vcompress instead of a vrgather?  These scale better

    // at high LMUL, at the cost of not being able to fold a following select

    // into them.  The mask constants are also smaller than the index vector

    // constants, and thus easier to materialize.

    if (isCompressMask(Mask)) {

      SmallVector<SDValue> MaskVals(NumElts,

                                    DAG.getConstant(false, DL, XLenVT));

      for (auto Idx : Mask) {

        if (Idx == -1)

          break;

        assert(Idx >= 0 && (unsigned)Idx < NumElts);

        MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);

      }

      MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

      SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);

      return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,

                         DAG.getUNDEF(VT));

    }


    if (VT.getScalarSizeInBits() == 8 &&

        any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {

      // On such a vector we're unable to use i8 as the index type.

      // FIXME: We could promote the index to i16 and use vrgatherei16, but that

      // may involve vector splitting if we're already at LMUL=8, or our

      // user-supplied maximum fixed-length LMUL.

      return SDValue();

    }


    // Base case for the two operand recursion below - handle the worst case

    // single source shuffle.

    unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;

    MVT IndexVT = VT.changeTypeToInteger();

    // Since we can't introduce illegal index types at this stage, use i16 and

    // vrgatherei16 if the corresponding index type for plain vrgather is greater

    // than XLenVT.

    if (IndexVT.getScalarType().bitsGT(XLenVT)) {

      GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;

      IndexVT = IndexVT.changeVectorElementType(MVT::i16);

    }


    // If the mask allows, we can do all the index computation in 16 bits.  This

    // requires less work and less register pressure at high LMUL, and creates

    // smaller constants which may be cheaper to materialize.

    if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&

        (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {

      GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;

      IndexVT = IndexVT.changeVectorElementType(MVT::i16);

    }


    MVT IndexContainerVT =

      ContainerVT.changeVectorElementType(IndexVT.getScalarType());


    V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

    SmallVector<SDValue> GatherIndicesLHS;

    for (int MaskIndex : Mask) {

      bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;

      GatherIndicesLHS.push_back(IsLHSIndex

                                 ? DAG.getConstant(MaskIndex, DL, XLenVT)

                                 : DAG.getUNDEF(XLenVT));

    }

    SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);

    LHSIndices =

        convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);

    // At m1 and less, there's no point trying any of the high LMUL splitting

    // techniques.  TODO: Should we reconsider this for DLEN < VLEN?

    if (NumElts <= MinVLMAX) {

      SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,

                                   DAG.getUNDEF(ContainerVT), TrueMask, VL);

      return convertFromScalableVector(VT, Gather, DAG, Subtarget);

    }


    const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);

    EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());

    auto [InnerTrueMask, InnerVL] =

        getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);

    int N =

        ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();

    assert(isPowerOf2_32(N) && N <= 8);


    // If we have a locally repeating mask, then we can reuse the first

    // register in the index register group for all registers within the

    // source register group.  TODO: This generalizes to m2, and m4.

    if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {

      SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);

      SDValue Gather = DAG.getUNDEF(ContainerVT);

      for (int i = 0; i < N; i++) {

        unsigned SubIdx = M1VT.getVectorMinNumElements() * i;

        SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);

        SDValue SubVec =

            DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,

                        DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);

        Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);

      }

      return convertFromScalableVector(VT, Gather, DAG, Subtarget);

    }


    // If we have a shuffle which only uses the first register in our source

    // register group, and repeats the same index across all spans, we can

    // use a single vrgather (and possibly some register moves).

    // TODO: This can be generalized for m2 or m4, or for any shuffle for

    // which we can do a linear number of shuffles to form an m1 which

    // contains all the output elements.

    if (isLowSourceShuffle(Mask, MinVLMAX) &&

        isSpanSplatShuffle(Mask, MinVLMAX)) {

      SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);

      SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);

      SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,

                                   DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);

      SDValue Gather = DAG.getUNDEF(ContainerVT);

      for (int i = 0; i < N; i++)

        Gather = DAG.getInsertSubvector(DL, Gather, SubVec,

                                        M1VT.getVectorMinNumElements() * i);

      return convertFromScalableVector(VT, Gather, DAG, Subtarget);

    }


    // If we have a shuffle which only uses the first register in our

    // source register group, we can do a linear number of m1 vrgathers

    // reusing the same source register (but with different indices)

    // TODO: This can be generalized for m2 or m4, or for any shuffle

    // for which we can do a vslidedown followed by this expansion.

    if (isLowSourceShuffle(Mask, MinVLMAX)) {

      SDValue SlideAmt =

          DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());

      SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);

      SDValue Gather = DAG.getUNDEF(ContainerVT);

      for (int i = 0; i < N; i++) {

        if (i != 0)

          LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,

                                     DAG.getUNDEF(IndexContainerVT), LHSIndices,

                                     SlideAmt, TrueMask, VL);

        SDValue SubIndex =

            DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);

        SDValue SubVec =

            DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,

                        DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);

        Gather = DAG.getInsertSubvector(DL, Gather, SubVec,

                                        M1VT.getVectorMinNumElements() * i);

      }

      return convertFromScalableVector(VT, Gather, DAG, Subtarget);

    }


    // Fallback to generic vrgather if we can't find anything better.

    // On many machines, this will be O(LMUL^2)

    SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,

                                 DAG.getUNDEF(ContainerVT), TrueMask, VL);

    return convertFromScalableVector(VT, Gather, DAG, Subtarget);

  }


  // As a backup, shuffles can be lowered via a vrgather instruction, possibly

  // merged with a second vrgather.

  SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;


  // Now construct the mask that will be used by the blended vrgather operation.

  // Construct the appropriate indices into each vector.

  for (int MaskIndex : Mask) {

    bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;

    ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0

                             ? MaskIndex : -1);

    ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));

  }


  // If the mask indices are disjoint between the two sources, we can lower it

  // as a vselect + a single source vrgather.vv. Don't do this if we think the

  // operands may end up being lowered to something cheaper than a vrgather.vv.

  if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&

      !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&

      !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&

      !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&

      !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))

    if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))

      return V;


  // Before hitting generic lowering fallbacks, try to widen the mask

  // to a wider SEW.

  if (SDValue V = tryWidenMaskForShuffle(Op, DAG))

    return V;


  // Try to pick a profitable operand order.

  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);

  SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);


  // Recursively invoke lowering for each operand if we had two

  // independent single source shuffles, and then combine the result via a

  // vselect.  Note that the vselect will likely be folded back into the

  // second permute (vrgather, or other) by the post-isel combine.

  V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);

  V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);


  SmallVector<SDValue> MaskVals;

  for (int MaskIndex : Mask) {

    bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;

    MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));

  }


  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");

  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);


  if (SwapOps)

    return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);

  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);

}


bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {

  // Only support legal VTs for other shuffles for now.

  if (!isTypeLegal(VT) || !Subtarget.hasVInstructions())

    return false;


  // Support splats for any type. These should type legalize well.

  if (ShuffleVectorSDNode::isSplatMask(M))

    return true;


  const unsigned NumElts = M.size();

  MVT SVT = VT.getSimpleVT();


  // Not for i1 vectors.

  if (SVT.getScalarType() == MVT::i1)

    return false;


  std::array<std::pair<int, int>, 2> SrcInfo;

  int Dummy1, Dummy2;

  return ShuffleVectorInst::isReverseMask(M, NumElts) ||

         (::isMaskedSlidePair(M, SrcInfo) &&

          isElementRotate(SrcInfo, NumElts)) ||

         isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);

}


// Lower CTLZ_ZERO_POISON or CTTZ_ZERO_POISON by converting to FP and extracting

// the exponent.

SDValue

RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_POISON(SDValue Op,

                                                SelectionDAG &DAG) const {

  MVT VT = Op.getSimpleValueType();

  unsigned EltSize = VT.getScalarSizeInBits();

  SDValue Src = Op.getOperand(0);

  SDLoc DL(Op);

  MVT ContainerVT = VT;


  // We choose FP type that can represent the value if possible. Otherwise, we

  // use rounding to zero conversion for correct exponent of the result.

  // TODO: Use f16 for i8 when possible?

  MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;

  if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))

    FloatEltVT = MVT::f32;

  MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());


  // Legal types should have been checked in the RISCVTargetLowering

  // constructor.

  // TODO: Splitting may make sense in some cases.

  assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&

         "Expected legal float type!");


  // For CTTZ_ZERO_POISON, we need to extract the lowest set bit using X & -X.

  // The trailing zero count is equal to log2 of this single bit value.

  if (Op.getOpcode() == ISD::CTTZ_ZERO_POISON) {

    SDValue Neg = DAG.getNegative(Src, DL, VT);

    Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);

  }


  // We have a legal FP type, convert to it.

  SDValue FloatVal;

  if (FloatVT.bitsGT(VT)) {

    FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);

  } else {

    // Use RTZ to avoid rounding influencing exponent of FloatVal.

    if (VT.isFixedLengthVector()) {

      ContainerVT = getContainerForFixedLengthVector(VT);

      Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

    }

    auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

    SDValue RTZRM =

        DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());

    MVT ContainerFloatVT =

        MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());

    FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,

                           Src, Mask, RTZRM, VL);

    if (VT.isFixedLengthVector())

      FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);

  }

  // Bitcast to integer and shift the exponent to the LSB.

  EVT IntVT = FloatVT.changeVectorElementTypeToInteger();

  SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);

  unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;


  // Restore back to original type. Truncation after SRL is to generate vnsrl.

  SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,

                            DAG.getConstant(ShiftAmt, DL, IntVT));

  if (IntVT.bitsLT(VT))

    Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);

  else if (IntVT.bitsGT(VT))

    Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);


  // The exponent contains log2 of the value in biased form.

  unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;

  // For trailing zeros, we just need to subtract the bias.

  if (Op.getOpcode() == ISD::CTTZ_ZERO_POISON)

    return DAG.getNode(ISD::SUB, DL, VT, Exp,

                       DAG.getConstant(ExponentBias, DL, VT));


  // For leading zeros, we need to remove the bias and convert from log2 to

  // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).

  unsigned Adjust = ExponentBias + (EltSize - 1);

  SDValue Res =

      DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);


  // The above result with zero input equals to Adjust which is greater than

  // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.

  if (Op.getOpcode() == ISD::CTLZ)

    Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));


  return Res;

}


SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,

                                                 SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue Source = Op->getOperand(0);

  MVT SrcVT = Source.getSimpleValueType();

  SDValue Mask = Op->getOperand(1);

  SDValue EVL = Op->getOperand(2);


  if (SrcVT.isFixedLengthVector()) {

    MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);

    Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);

    Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,

                                   Subtarget);

    SrcVT = ContainerVT;

  }


  // Convert to boolean vector.

  if (SrcVT.getScalarType() != MVT::i1) {

    SDValue AllZero = DAG.getConstant(0, DL, SrcVT);

    SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());

    Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,

                         {Source, AllZero, DAG.getCondCode(ISD::SETNE),

                          DAG.getUNDEF(SrcVT), Mask, EVL});

  }


  SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);

  if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_POISON)

    // In this case, we can interpret poison as -1, so nothing to do further.

    return Res;


  // Convert -1 to VL.

  SDValue SetCC =

      DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);

  Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);

  return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);

}


// While RVV has alignment restrictions, we should always be able to load as a

// legal equivalently-sized byte-typed vector instead. This method is

// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If

// the load is already correctly-aligned, it returns SDValue().

SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,

                                                    SelectionDAG &DAG) const {

  auto *Load = cast<LoadSDNode>(Op);

  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");


  if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                     Load->getMemoryVT(),

                                     *Load->getMemOperand()))

    return SDValue();


  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  unsigned EltSizeBits = VT.getScalarSizeInBits();

  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&

         "Unexpected unaligned RVV load type");

  MVT NewVT =

      MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));

  assert(NewVT.isValid() &&

         "Expecting equally-sized RVV vector types to be legal");

  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),

                          Load->getPointerInfo(), Load->getBaseAlign(),

                          Load->getMemOperand()->getFlags());

  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);

}


// While RVV has alignment restrictions, we should always be able to store as a

// legal equivalently-sized byte-typed vector instead. This method is

// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It

// returns SDValue() if the store is already correctly aligned.

SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,

                                                     SelectionDAG &DAG) const {

  auto *Store = cast<StoreSDNode>(Op);

  assert(Store && Store->getValue().getValueType().isVector() &&

         "Expected vector store");


  if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                     Store->getMemoryVT(),

                                     *Store->getMemOperand()))

    return SDValue();


  SDLoc DL(Op);

  SDValue StoredVal = Store->getValue();

  MVT VT = StoredVal.getSimpleValueType();

  unsigned EltSizeBits = VT.getScalarSizeInBits();

  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&

         "Unexpected unaligned RVV store type");

  MVT NewVT =

      MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));

  assert(NewVT.isValid() &&

         "Expecting equally-sized RVV vector types to be legal");

  StoredVal = DAG.getBitcast(NewVT, StoredVal);

  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),

                      Store->getPointerInfo(), Store->getBaseAlign(),

                      Store->getMemOperand()->getFlags());

}


// While RVV has alignment restrictions, we should always be able to load as a

// legal equivalently-sized byte-typed vector instead. This method is

// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If

// the load is already correctly-aligned, it returns SDValue().

SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,

                                                   SelectionDAG &DAG) const {

  auto *Load = cast<VPLoadSDNode>(Op);

  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");


  if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                     Load->getMemoryVT(),

                                     *Load->getMemOperand()))

    return SDValue();


  SDValue Mask = Load->getMask();


  // FIXME: Handled masked loads somehow.

  if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))

    return SDValue();


  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  unsigned EltSizeBits = VT.getScalarSizeInBits();

  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&

         "Unexpected unaligned RVV load type");

  MVT NewVT =

      MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));

  assert(NewVT.isValid() &&

         "Expecting equally-sized RVV vector types to be legal");


  SDValue VL = Load->getVectorLength();

  VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,

                   DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));


  MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());

  SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),

                            DAG.getAllOnesConstant(DL, MaskVT), VL,

                            Load->getPointerInfo(), Load->getBaseAlign(),

                            Load->getMemOperand()->getFlags(), AAMDNodes());

  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);

}


// While RVV has alignment restrictions, we should always be able to store as a

// legal equivalently-sized byte-typed vector instead. This method is

// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.

// It returns SDValue() if the store is already correctly aligned.

SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,

                                                    SelectionDAG &DAG) const {

  auto *Store = cast<VPStoreSDNode>(Op);

  assert(Store && Store->getValue().getValueType().isVector() &&

         "Expected vector store");


  if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                     Store->getMemoryVT(),

                                     *Store->getMemOperand()))

    return SDValue();


  SDValue Mask = Store->getMask();


  // FIXME: Handled masked stores somehow.

  if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))

    return SDValue();


  SDLoc DL(Op);

  SDValue StoredVal = Store->getValue();

  MVT VT = StoredVal.getSimpleValueType();

  unsigned EltSizeBits = VT.getScalarSizeInBits();

  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&

         "Unexpected unaligned RVV store type");

  MVT NewVT =

      MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));

  assert(NewVT.isValid() &&

         "Expecting equally-sized RVV vector types to be legal");


  SDValue VL = Store->getVectorLength();

  VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,

                   DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));


  StoredVal = DAG.getBitcast(NewVT, StoredVal);


  LocationSize Size = LocationSize::precise(NewVT.getStoreSize());

  MachineFunction &MF = DAG.getMachineFunction();

  MachineMemOperand *MMO = MF.getMachineMemOperand(

      Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,

      Store->getBaseAlign());


  MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());

  return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),

                        DAG.getUNDEF(Store->getBasePtr().getValueType()),

                        DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO,

                        ISD::UNINDEXED);

}


static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,

                             const RISCVSubtarget &Subtarget) {

  assert(Op.getValueType() == MVT::i64 && "Unexpected VT");


  int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();


  // All simm32 constants should be handled by isel.

  // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making

  // this check redundant, but small immediates are common so this check

  // should have better compile time.

  if (isInt<32>(Imm))

    return Op;


  // We only need to cost the immediate, if constant pool lowering is enabled.

  if (!Subtarget.useConstantPoolForLargeInts())

    return Op;


  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);

  if (Seq.size() <= Subtarget.getMaxBuildIntsCost())

    return Op;


  // Optimizations below are disabled for opt size. If we're optimizing for

  // size, use a constant pool.

  if (DAG.shouldOptForSize())

    return SDValue();


  // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do

  // that if it will avoid a constant pool.

  // It will require an extra temporary register though.

  // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where

  // low and high 32 bits are the same and bit 31 and 63 are set.

  unsigned ShiftAmt, AddOpc;

  RISCVMatInt::InstSeq SeqLo =

      RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);

  if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())

    return Op;


  return SDValue();

}


SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,

                                             SelectionDAG &DAG) const {

  MVT VT = Op.getSimpleValueType();

  const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();


  // Can this constant be selected by a Zfa FLI instruction?

  bool Negate = false;

  int Index = getLegalZfaFPImm(Imm, VT);


  // If the constant is negative, try negating.

  if (Index < 0 && Imm.isNegative()) {

    Index = getLegalZfaFPImm(-Imm, VT);

    Negate = true;

  }


  // If we couldn't find a FLI lowering, fall back to generic code.

  if (Index < 0)

    return SDValue();


  // Emit an FLI+FNEG. We use a custom node to hide from constant folding.

  SDLoc DL(Op);

  SDValue Const =

      DAG.getNode(RISCVISD::FLI, DL, VT,

                  DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));

  if (!Negate)

    return Const;


  return DAG.getNode(ISD::FNEG, DL, VT, Const);

}


static SDValue LowerPREFETCH(SDValue Op, const RISCVSubtarget &Subtarget,

                             SelectionDAG &DAG) {


  unsigned IsData = Op.getConstantOperandVal(4);


  // mips-p8700  we support data prefetch for now.

  if (Subtarget.hasVendorXMIPSCBOP() && !IsData)

    return Op.getOperand(0);

  return Op;

}


static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  SDLoc dl(Op);

  AtomicOrdering FenceOrdering =

      static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));

  SyncScope::ID FenceSSID =

      static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));


  if (Subtarget.hasStdExtZtso()) {

    // The only fence that needs an instruction is a sequentially-consistent

    // cross-thread fence.

    if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&

        FenceSSID == SyncScope::System)

      return Op;


    // MEMBARRIER is a compiler barrier; it codegens to a no-op.

    return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));

  }


  // singlethread fences only synchronize with signal handlers on the same

  // thread and thus only need to preserve instruction order, not actually

  // enforce memory ordering.

  if (FenceSSID == SyncScope::SingleThread)

    // MEMBARRIER is a compiler barrier; it codegens to a no-op.

    return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));


  return Op;

}


SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,

                                             SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();

  unsigned Check = Op.getConstantOperandVal(1);

  unsigned TDCMask = 0;

  if (Check & fcSNan)

    TDCMask |= RISCV::FPMASK_Signaling_NaN;

  if (Check & fcQNan)

    TDCMask |= RISCV::FPMASK_Quiet_NaN;

  if (Check & fcPosInf)

    TDCMask |= RISCV::FPMASK_Positive_Infinity;

  if (Check & fcNegInf)

    TDCMask |= RISCV::FPMASK_Negative_Infinity;

  if (Check & fcPosNormal)

    TDCMask |= RISCV::FPMASK_Positive_Normal;

  if (Check & fcNegNormal)

    TDCMask |= RISCV::FPMASK_Negative_Normal;

  if (Check & fcPosSubnormal)

    TDCMask |= RISCV::FPMASK_Positive_Subnormal;

  if (Check & fcNegSubnormal)

    TDCMask |= RISCV::FPMASK_Negative_Subnormal;

  if (Check & fcPosZero)

    TDCMask |= RISCV::FPMASK_Positive_Zero;

  if (Check & fcNegZero)

    TDCMask |= RISCV::FPMASK_Negative_Zero;


  bool IsOneBitMask = isPowerOf2_32(TDCMask);


  SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);


  if (VT.isVector()) {

    SDValue Op0 = Op.getOperand(0);

    MVT VT0 = Op.getOperand(0).getSimpleValueType();


    if (VT.isScalableVector()) {

      MVT DstVT = VT0.changeVectorElementTypeToInteger();

      auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);

      SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,

                                    VL, Op->getFlags());

      if (IsOneBitMask)

        return DAG.getSetCC(DL, VT, FPCLASS,

                            DAG.getConstant(TDCMask, DL, DstVT),

                            ISD::CondCode::SETEQ);

      SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,

                                DAG.getConstant(TDCMask, DL, DstVT));

      return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),

                          ISD::SETNE);

    }


    MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);

    MVT ContainerVT = getContainerForFixedLengthVector(VT);

    MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();

    auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);

    Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);


    SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,

                                  Mask, VL, Op->getFlags());


    TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,

                           DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);

    if (IsOneBitMask) {

      SDValue VMSEQ =

          DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,

                      {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),

                       DAG.getUNDEF(ContainerVT), Mask, VL});

      return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);

    }

    SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,

                              TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);


    SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);

    SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,

                            DAG.getUNDEF(ContainerDstVT), SplatZero, VL);


    SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,

                                {AND, SplatZero, DAG.getCondCode(ISD::SETNE),

                                 DAG.getUNDEF(ContainerVT), Mask, VL});

    return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);

  }


  SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));

  SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);

  SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),

                             ISD::CondCode::SETNE);

  return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);

}


// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these

// operations propagate nans.


static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();


  SDValue X = Op.getOperand(0);

  SDValue Y = Op.getOperand(1);


  if (!VT.isVector()) {

    MVT XLenVT = Subtarget.getXLenVT();


    // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This

    // ensures that when one input is a nan, the other will also be a nan

    // allowing the nan to propagate. If both inputs are nan, this will swap the

    // inputs which is harmless.


    SDValue NewY = Y;

    if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {

      SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);

      NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);

    }


    SDValue NewX = X;

    if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {

      SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);

      NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);

    }


    unsigned Opc =

        Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;

    return DAG.getNode(Opc, DL, VT, NewX, NewY);

  }


  // Check no NaNs before converting to fixed vector scalable.

  bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);

  bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT, Subtarget);

    X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);

    Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  SDValue NewY = Y;

  if (!XIsNeverNan) {

    SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),

                                    {X, X, DAG.getCondCode(ISD::SETOEQ),

                                     DAG.getUNDEF(ContainerVT), Mask, VL});

    NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,

                       DAG.getUNDEF(ContainerVT), VL);

  }


  SDValue NewX = X;

  if (!YIsNeverNan) {

    SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),

                                    {Y, Y, DAG.getCondCode(ISD::SETOEQ),

                                     DAG.getUNDEF(ContainerVT), Mask, VL});

    NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,

                       DAG.getUNDEF(ContainerVT), VL);

  }


  unsigned Opc =

      Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;

  SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,

                            DAG.getUNDEF(ContainerVT), Mask, VL);

  if (VT.isFixedLengthVector())

    Res = convertFromScalableVector(VT, Res, DAG, Subtarget);

  return Res;

}


static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG,

                               const RISCVSubtarget &Subtarget) {

  bool IsFABS = Op.getOpcode() == ISD::FABS;

  assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&

         "Wrong opcode for lowering FABS or FNEG.");


  MVT XLenVT = Subtarget.getXLenVT();

  MVT VT = Op.getSimpleValueType();

  assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");


  SDLoc DL(Op);

  SDValue Fmv =

      DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));


  APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);

  Mask = Mask.sext(Subtarget.getXLen());


  unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;

  SDValue Logic =

      DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));

  return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);

}


static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG,

                              const RISCVSubtarget &Subtarget) {

  assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");


  MVT XLenVT = Subtarget.getXLenVT();

  MVT VT = Op.getSimpleValueType();

  assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");


  SDValue Mag = Op.getOperand(0);

  SDValue Sign = Op.getOperand(1);


  SDLoc DL(Op);


  // Get sign bit into an integer value.

  unsigned SignSize = Sign.getValueSizeInBits();

  SDValue SignAsInt = [&]() {

    if (SignSize == Subtarget.getXLen())

      return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);

    switch (SignSize) {

    case 16:

      return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);

    case 32:

      return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);

    case 64: {

      assert(XLenVT == MVT::i32 && "Unexpected type");

      // Copy the upper word to integer.

      SignSize = 32;

      return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)

          .getValue(1);

    }

    default:

      llvm_unreachable("Unexpected sign size");

    }

  }();


  // Get the signbit at the right position for MagAsInt.

  if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())

    SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,

                            SignAsInt,

                            DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));


  // Mask the sign bit and any bits above it. The extra bits will be dropped

  // when we convert back to FP.

  SDValue SignMask = DAG.getConstant(

      APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);

  SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);


  // Transform Mag value to integer, and clear the sign bit.

  SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);

  SDValue ClearSignMask = DAG.getConstant(

      APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);

  SDValue ClearedSign =

      DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);


  SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,

                                   SDNodeFlags::Disjoint);


  return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);

}


/// Get a RISC-V target specified VL op for a given SDNode.


static unsigned getRISCVVLOp(SDValue Op) {

#define OP_CASE(NODE)                                                          \

  case ISD::NODE:                                                              \

    return RISCVISD::NODE##_VL;

#define VP_CASE(NODE)                                                          \

  case ISD::VP_##NODE:                                                         \

    return RISCVISD::NODE##_VL;

  // clang-format off

  switch (Op.getOpcode()) {

  default:

    llvm_unreachable("don't have RISC-V specified VL op for this SDNode");

  OP_CASE(ADD)

  OP_CASE(SUB)

  OP_CASE(MUL)

  OP_CASE(MULHS)

  OP_CASE(MULHU)

  OP_CASE(SDIV)

  OP_CASE(SREM)

  OP_CASE(UDIV)

  OP_CASE(UREM)

  OP_CASE(SHL)

  OP_CASE(SRA)

  OP_CASE(SRL)

  OP_CASE(ROTL)

  OP_CASE(ROTR)

  OP_CASE(BSWAP)

  OP_CASE(CTTZ)

  OP_CASE(CTLZ)

  OP_CASE(CTPOP)

  OP_CASE(BITREVERSE)

  OP_CASE(SADDSAT)

  OP_CASE(UADDSAT)

  OP_CASE(SSUBSAT)

  OP_CASE(USUBSAT)

  OP_CASE(AVGFLOORS)

  OP_CASE(AVGFLOORU)

  OP_CASE(AVGCEILS)

  OP_CASE(AVGCEILU)

  OP_CASE(FADD)

  OP_CASE(FSUB)

  OP_CASE(FMUL)

  OP_CASE(FDIV)

  OP_CASE(FNEG)

  OP_CASE(FABS)

  OP_CASE(FCOPYSIGN)

  OP_CASE(FSQRT)

  OP_CASE(SMIN)

  OP_CASE(SMAX)

  OP_CASE(UMIN)

  OP_CASE(UMAX)

  OP_CASE(ABDS)

  OP_CASE(ABDU)

  OP_CASE(STRICT_FADD)

  OP_CASE(STRICT_FSUB)

  OP_CASE(STRICT_FMUL)

  OP_CASE(STRICT_FDIV)

  OP_CASE(STRICT_FSQRT)

  VP_CASE(SDIV)       // VP_SDIV

  VP_CASE(SREM)       // VP_SREM

  VP_CASE(UDIV)       // VP_UDIV

  VP_CASE(UREM)       // VP_UREM

  case ISD::CTLZ_ZERO_POISON:

    return RISCVISD::CTLZ_VL;

  case ISD::CTTZ_ZERO_POISON:

    return RISCVISD::CTTZ_VL;

  case ISD::FMA:

    return RISCVISD::VFMADD_VL;

  case ISD::STRICT_FMA:

    return RISCVISD::STRICT_VFMADD_VL;

  case ISD::AND:

    if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

      return RISCVISD::VMAND_VL;

    return RISCVISD::AND_VL;

  case ISD::OR:

    if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

      return RISCVISD::VMOR_VL;

    return RISCVISD::OR_VL;

  case ISD::XOR:

    if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

      return RISCVISD::VMXOR_VL;

    return RISCVISD::XOR_VL;

  case ISD::ANY_EXTEND:

  case ISD::ZERO_EXTEND:

    return RISCVISD::VZEXT_VL;

  case ISD::SIGN_EXTEND:

    return RISCVISD::VSEXT_VL;

  case ISD::SETCC:

    return RISCVISD::SETCC_VL;

  case ISD::VSELECT:

    return RISCVISD::VMERGE_VL;

  case ISD::VP_MERGE:

    return RISCVISD::VMERGE_VL;

  case ISD::FMINNUM:

  case ISD::FMINIMUMNUM:

    return RISCVISD::VFMIN_VL;

  case ISD::FMAXNUM:

  case ISD::FMAXIMUMNUM:

    return RISCVISD::VFMAX_VL;

  case ISD::LRINT:

  case ISD::LLRINT:

    return RISCVISD::VFCVT_RM_X_F_VL;

  case ISD::MASKED_UDIV:

    return RISCVISD::UDIV_VL;

  case ISD::MASKED_UREM:

    return RISCVISD::UREM_VL;

  case ISD::MASKED_SDIV:

    return RISCVISD::SDIV_VL;

  case ISD::MASKED_SREM:

    return RISCVISD::SREM_VL;

  }

  // clang-format on

#undef OP_CASE

#undef VP_CASE

}


static bool isPromotedOpNeedingSplit(SDValue Op,

                                     const RISCVSubtarget &Subtarget,

                                     const TargetLowering &TLI) {

  MVT OpVT = Op.getSimpleValueType();

  if (!OpVT.isVector())

    return false;

  MVT EltVT = OpVT.getVectorElementType();

  if (!(EltVT == MVT::f16 && Subtarget.hasVInstructionsF16Minimal() &&

        !Subtarget.hasVInstructionsF16()) &&

      !(EltVT == MVT::bf16 && Subtarget.hasVInstructionsBF16Minimal() &&

        (!Subtarget.hasVInstructionsBF16() ||

         !llvm::is_contained(ZvfbfaOps, Op.getOpcode()))))

    return false;

  // Need to split when the same width f32 vector type isn't legal.

  return !TLI.isTypeLegal(

      MVT::getVectorVT(MVT::f32, OpVT.getVectorElementCount()));

}


static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {

  auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());

  SDLoc DL(Op);


  SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());

  SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());


  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {

    if (!Op.getOperand(j).getValueType().isVector()) {

      LoOperands[j] = Op.getOperand(j);

      HiOperands[j] = Op.getOperand(j);

      continue;

    }

    std::tie(LoOperands[j], HiOperands[j]) =

        DAG.SplitVector(Op.getOperand(j), DL);

  }


  SDValue LoRes =

      DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());

  SDValue HiRes =

      DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());


  return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);

}


static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG,

                                      bool IsVP) {

  SDLoc DL(Op);


  if (IsVP) {

    auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);

    auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);

    auto [EVLLo, EVLHi] =

        DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);


    SDValue ResLo =

        DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),

                    {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());

    return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),

                       {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());

  }


  unsigned Opcode = Op.getOpcode();

  unsigned OpNo = Opcode == ISD::VECREDUCE_SEQ_FADD ? 1 : 0;


  auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(OpNo), DL);

  if (Opcode == ISD::VECREDUCE_SEQ_FADD) {

    SDValue ResLo = DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),

                                Op.getOperand(0), Lo, Op->getFlags());

    return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), ResLo, Hi,

                       Op->getFlags());

  }


  SDValue ResLo =

      DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Lo, Op->getFlags());

  SDValue ResHi =

      DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Hi, Op->getFlags());

  unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());

  return DAG.getNode(BaseOpc, DL, Op.getValueType(), ResLo, ResHi,

                     Op->getFlags());

}


static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {


  assert(Op->isStrictFPOpcode());


  auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));


  SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));

  SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));


  SDLoc DL(Op);


  SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());

  SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());


  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {

    if (!Op.getOperand(j).getValueType().isVector()) {

      LoOperands[j] = Op.getOperand(j);

      HiOperands[j] = Op.getOperand(j);

      continue;

    }

    std::tie(LoOperands[j], HiOperands[j]) =

        DAG.SplitVector(Op.getOperand(j), DL);

  }


  SDValue LoRes =

      DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());

  HiOperands[0] = LoRes.getValue(1);

  SDValue HiRes =

      DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());


  SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),

                          LoRes.getValue(0), HiRes.getValue(0));

  return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);

}


SDValue

RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,

                                                   SelectionDAG &DAG) const {

  assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&

         "Unexpected bfloat16 load lowering");


  SDLoc DL(Op);

  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());

  EVT MemVT = LD->getMemoryVT();

  SDValue Load = DAG.getExtLoad(

      ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),

      LD->getBasePtr(),

      EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),

      LD->getMemOperand());

  // Using mask to make bf16 nan-boxing valid when we don't have flh

  // instruction. -65536 would be treat as a small number and thus it can be

  // directly used lui to get the constant.

  SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());

  SDValue OrSixteenOne =

      DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});

  SDValue ConvertedResult =

      DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);

  return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);

}


SDValue

RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,

                                                    SelectionDAG &DAG) const {

  assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&

         "Unexpected bfloat16 store lowering");


  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());

  SDLoc DL(Op);

  SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,

                            Subtarget.getXLenVT(), ST->getValue());

  return DAG.getTruncStore(

      ST->getChain(), DL, FMV, ST->getBasePtr(),

      EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),

      ST->getMemOperand());

}


static SDValue lowerCttzElts(SDValue Op, SelectionDAG &DAG,

                             const RISCVSubtarget &Subtarget);


SDValue RISCVTargetLowering::LowerOperation(SDValue Op,

                                            SelectionDAG &DAG) const {

  switch (Op.getOpcode()) {

  default:

    reportFatalInternalError(

        "Unimplemented RISCVTargetLowering::LowerOperation Case");

  case ISD::PREFETCH:

    return LowerPREFETCH(Op, Subtarget, DAG);

  case ISD::ATOMIC_FENCE:

    return LowerATOMIC_FENCE(Op, DAG, Subtarget);

  case ISD::GlobalAddress:

    return lowerGlobalAddress(Op, DAG);

  case ISD::BlockAddress:

    return lowerBlockAddress(Op, DAG);

  case ISD::ConstantPool:

    return lowerConstantPool(Op, DAG);

  case ISD::JumpTable:

    return lowerJumpTable(Op, DAG);

  case ISD::GlobalTLSAddress:

    return lowerGlobalTLSAddress(Op, DAG);

  case ISD::Constant:

    return lowerConstant(Op, DAG, Subtarget);

  case ISD::ConstantFP:

    return lowerConstantFP(Op, DAG);

  case ISD::SELECT:

    return lowerSELECT(Op, DAG);

  case ISD::BRCOND:

    return lowerBRCOND(Op, DAG);

  case ISD::VASTART:

    return lowerVASTART(Op, DAG);

  case ISD::FRAMEADDR:

    return lowerFRAMEADDR(Op, DAG);

  case ISD::RETURNADDR:

    return lowerRETURNADDR(Op, DAG);

  case ISD::SHL_PARTS:

    return lowerShiftLeftParts(Op, DAG);

  case ISD::SRA_PARTS:

    return lowerShiftRightParts(Op, DAG, true);

  case ISD::SRL_PARTS:

    return lowerShiftRightParts(Op, DAG, false);

  case ISD::ROTL:

  case ISD::ROTR:

    if (Op.getValueType().isFixedLengthVector()) {

      assert(Subtarget.hasStdExtZvkb());

      return lowerToScalableOp(Op, DAG);

    }

    assert(Subtarget.hasVendorXTHeadBb() &&

           !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&

           "Unexpected custom legalization");

    // XTHeadBb only supports rotate by constant.

    if (!isa<ConstantSDNode>(Op.getOperand(1)))

      return SDValue();

    return Op;

  case ISD::BITCAST: {

    SDLoc DL(Op);

    EVT VT = Op.getValueType();

    SDValue Op0 = Op.getOperand(0);

    EVT Op0VT = Op0.getValueType();

    MVT XLenVT = Subtarget.getXLenVT();

    if (Op0VT == MVT::i16 &&

        ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||

         (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {

      SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);

      return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);

    }

    if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&

        Subtarget.hasStdExtFOrZfinx()) {

      SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);

      return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);

    }

    if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&

        Subtarget.hasStdExtDOrZdinx()) {

      SDValue Lo, Hi;

      std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);

      return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);

    }


    if (Subtarget.hasStdExtP()) {

      bool Is32BitCast =

          (VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||

          (Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));

      bool Is64BitCast =

          (VT == MVT::i64 && (Op0VT == MVT::v8i8 || Op0VT == MVT::v4i16 ||

                              Op0VT == MVT::v2i32)) ||

          (Op0VT == MVT::i64 &&

           (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));

      if (Is32BitCast || Is64BitCast)

        return Op;

    }


    // Consider other scalar<->scalar casts as legal if the types are legal.

    // Otherwise expand them.

    if (!VT.isVector() && !Op0VT.isVector()) {

      if (isTypeLegal(VT) && isTypeLegal(Op0VT))

        return Op;

      return SDValue();

    }


    assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&

           "Unexpected types");


    if (VT.isFixedLengthVector()) {

      // We can handle fixed length vector bitcasts with a simple replacement

      // in isel.

      if (Op0VT.isFixedLengthVector())

        return Op;

      // When bitcasting from scalar to fixed-length vector, insert the scalar

      // into a one-element vector of the result type, and perform a vector

      // bitcast.

      if (!Op0VT.isVector()) {

        EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);

        if (!isTypeLegal(BVT))

          return SDValue();

        return DAG.getBitcast(

            VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));

      }

      return SDValue();

    }

    // Custom-legalize bitcasts from fixed-length vector types to scalar types

    // thus: bitcast the vector to a one-element vector type whose element type

    // is the same as the result type, and extract the first element.

    if (!VT.isVector() && Op0VT.isFixedLengthVector()) {

      EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);

      if (!isTypeLegal(BVT))

        return SDValue();

      SDValue BVec = DAG.getBitcast(BVT, Op0);

      return DAG.getExtractVectorElt(DL, VT, BVec, 0);

    }

    return SDValue();

  }

  case ISD::INTRINSIC_WO_CHAIN:

    return LowerINTRINSIC_WO_CHAIN(Op, DAG);

  case ISD::INTRINSIC_W_CHAIN:

    return LowerINTRINSIC_W_CHAIN(Op, DAG);

  case ISD::INTRINSIC_VOID:

    return LowerINTRINSIC_VOID(Op, DAG);

  case ISD::IS_FPCLASS:

    return LowerIS_FPCLASS(Op, DAG);

  case ISD::BITREVERSE: {

    MVT VT = Op.getSimpleValueType();

    if (VT.isFixedLengthVector()) {

      assert(Subtarget.hasStdExtZvbb());

      return lowerToScalableOp(Op, DAG);

    }

    SDLoc DL(Op);

    assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");

    assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");

    // Expand bitreverse to a bswap(rev8) followed by brev8.

    SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));

    return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);

  }

  case ISD::TRUNCATE:

  case ISD::TRUNCATE_SSAT_S:

  case ISD::TRUNCATE_USAT_U:

    // Only custom-lower vector truncates

    if (!Op.getSimpleValueType().isVector())

      return Op;

    return lowerVectorTrunc(Op, DAG);

  case ISD::ANY_EXTEND:

  case ISD::ZERO_EXTEND:

    if (Op.getOperand(0).getValueType().isVector() &&

        Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)

      return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);

    if (Op.getValueType().isScalableVector())

      return Op;

    return lowerToScalableOp(Op, DAG);

  case ISD::SIGN_EXTEND:

    if (Op.getOperand(0).getValueType().isVector() &&

        Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)

      return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);

    if (Op.getValueType().isScalableVector())

      return Op;

    return lowerToScalableOp(Op, DAG);

  case ISD::SPLAT_VECTOR_PARTS:

    return lowerSPLAT_VECTOR_PARTS(Op, DAG);

  case ISD::INSERT_VECTOR_ELT:

    return lowerINSERT_VECTOR_ELT(Op, DAG);

  case ISD::EXTRACT_VECTOR_ELT:

    return lowerEXTRACT_VECTOR_ELT(Op, DAG);

  case ISD::SCALAR_TO_VECTOR: {

    MVT VT = Op.getSimpleValueType();

    SDLoc DL(Op);

    SDValue Scalar = Op.getOperand(0);

    if (VT.getVectorElementType() == MVT::i1) {

      MVT WideVT = VT.changeVectorElementType(MVT::i8);

      SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);

      return DAG.getNode(ISD::TRUNCATE, DL, VT, V);

    }

    MVT ContainerVT = VT;

    if (VT.isFixedLengthVector())

      ContainerVT = getContainerForFixedLengthVector(VT);

    SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;


    SDValue V;

    if (VT.isFloatingPoint()) {

      V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,

                      DAG.getUNDEF(ContainerVT), Scalar, VL);

    } else {

      Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);

      V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,

                      DAG.getUNDEF(ContainerVT), Scalar, VL);

    }

    if (VT.isFixedLengthVector())

      V = convertFromScalableVector(VT, V, DAG, Subtarget);

    return V;

  }

  case ISD::VSCALE: {

    MVT XLenVT = Subtarget.getXLenVT();

    MVT VT = Op.getSimpleValueType();

    SDLoc DL(Op);

    SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);

    // We define our scalable vector types for lmul=1 to use a 64 bit known

    // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate

    // vscale as VLENB / 8.

    static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");

    if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)

      reportFatalInternalError("Support for VLEN==32 is incomplete.");

    // We assume VLENB is a multiple of 8. We manually choose the best shift

    // here because SimplifyDemandedBits isn't always able to simplify it.

    uint64_t Val = Op.getConstantOperandVal(0);

    if (isPowerOf2_64(Val)) {

      uint64_t Log2 = Log2_64(Val);

      if (Log2 < 3) {

        SDNodeFlags Flags;

        Flags.setExact(true);

        Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,

                          DAG.getConstant(3 - Log2, DL, XLenVT), Flags);

      } else if (Log2 > 3) {

        Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,

                          DAG.getConstant(Log2 - 3, DL, XLenVT));

      }

    } else if ((Val % 8) == 0) {

      // If the multiplier is a multiple of 8, scale it down to avoid needing

      // to shift the VLENB value.

      Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,

                        DAG.getConstant(Val / 8, DL, XLenVT));

    } else {

      SDNodeFlags Flags;

      Flags.setExact(true);

      SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,

                                   DAG.getConstant(3, DL, XLenVT), Flags);

      Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,

                        DAG.getConstant(Val, DL, XLenVT));

    }

    return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);

  }

  case ISD::FPOWI: {

    // Custom promote f16 powi with illegal i32 integer type on RV64. Once

    // promoted this will be legalized into a libcall by LegalizeIntegerTypes.

    if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&

        Op.getOperand(1).getValueType() == MVT::i32) {

      SDLoc DL(Op);

      SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));

      SDValue Powi =

          DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));

      return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,

                         DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));

    }

    return SDValue();

  }

  case ISD::FMAXIMUM:

  case ISD::FMINIMUM:

    if (isPromotedOpNeedingSplit(Op, Subtarget, *this))

      return SplitVectorOp(Op, DAG);

    return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);

  case ISD::FP_EXTEND:

  case ISD::FP_ROUND:

    return lowerVectorFPExtendOrRound(Op, DAG);

  case ISD::STRICT_FP_ROUND:

  case ISD::STRICT_FP_EXTEND:

    return lowerStrictFPExtendOrRoundLike(Op, DAG);

  case ISD::SINT_TO_FP:

  case ISD::UINT_TO_FP:

    // Fall back to zvfbfmin for bf16 case if source type is wider than 8 bits.

    if (SDValue Op1 = Op.getOperand(0);

        Op.getValueType().isVector() &&

        ((Op.getValueType().getScalarType() == MVT::f16 &&

          (Subtarget.hasVInstructionsF16Minimal() &&

           !Subtarget.hasVInstructionsF16())) ||

         (Op.getValueType().getScalarType() == MVT::bf16 &&

          (Subtarget.hasVInstructionsBF16Minimal() &&

           (!Subtarget.hasVInstructionsBF16() ||

            Op1.getValueType().getScalarSizeInBits() > 8))))) {

      MVT NVT =

          MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());

      if (!isTypeLegal(NVT))

        return SplitVectorOp(Op, DAG);

      // int -> f32

      SDLoc DL(Op);

      SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());

      // f32 -> [b]f16

      return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,

                         DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));

    }

    [[fallthrough]];

  case ISD::FP_TO_SINT:

  case ISD::FP_TO_UINT:

    // Fall back to zvfbfmin for bf16 case if destination type is wider than 8

    // bits.

    if (SDValue Op1 = Op.getOperand(0);

        Op1.getValueType().isVector() &&

        ((Op1.getValueType().getScalarType() == MVT::f16 &&

          (Subtarget.hasVInstructionsF16Minimal() &&

           !Subtarget.hasVInstructionsF16())) ||

         (Op1.getValueType().getScalarType() == MVT::bf16 &&

          (Subtarget.hasVInstructionsBF16Minimal() &&

           (!Subtarget.hasVInstructionsBF16() ||

            Op.getValueType().getScalarSizeInBits() > 8))))) {

      MVT NVT = MVT::getVectorVT(MVT::f32,

                                 Op1.getValueType().getVectorElementCount());

      if (!isTypeLegal(NVT))

        return SplitVectorOp(Op, DAG);

      // [b]f16 -> f32

      SDLoc DL(Op);

      SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);

      // f32 -> int

      return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);

    }

    [[fallthrough]];

  case ISD::STRICT_FP_TO_SINT:

  case ISD::STRICT_FP_TO_UINT:

  case ISD::STRICT_SINT_TO_FP:

  case ISD::STRICT_UINT_TO_FP: {

    // RVV can only do fp<->int conversions to types half/double the size as

    // the source. We custom-lower any conversions that do two hops into

    // sequences.

    MVT VT = Op.getSimpleValueType();

    if (VT.isScalarInteger())

      return lowerFP_TO_INT(Op, DAG, Subtarget);

    bool IsStrict = Op->isStrictFPOpcode();

    SDValue Src = Op.getOperand(0 + IsStrict);

    MVT SrcVT = Src.getSimpleValueType();

    if (SrcVT.isScalarInteger())

      return lowerINT_TO_FP(Op, DAG, Subtarget);

    if (!VT.isVector())

      return Op;

    SDLoc DL(Op);

    MVT EltVT = VT.getVectorElementType();

    MVT SrcEltVT = SrcVT.getVectorElementType();

    unsigned EltSize = EltVT.getSizeInBits();

    unsigned SrcEltSize = SrcEltVT.getSizeInBits();

    assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&

           "Unexpected vector element types");


    bool IsInt2FP = SrcEltVT.isInteger();

    // Widening conversions

    if (EltSize > (2 * SrcEltSize)) {

      if (IsInt2FP) {

        // Do a regular integer sign/zero extension then convert to float.

        MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),

                                      VT.getVectorElementCount());

        unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||

                              Op.getOpcode() == ISD::STRICT_UINT_TO_FP)

                                 ? ISD::ZERO_EXTEND

                                 : ISD::SIGN_EXTEND;

        SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);

        if (IsStrict)

          return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),

                             Op.getOperand(0), Ext);

        return DAG.getNode(Op.getOpcode(), DL, VT, Ext);

      }

      // FP2Int

      assert((SrcEltVT == MVT::f16 || SrcEltVT == MVT::bf16) &&

             "Unexpected FP_TO_[US]INT lowering");

      // Do one doubling fp_extend then complete the operation by converting

      // to int.

      MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

      if (IsStrict) {

        auto [FExt, Chain] =

            DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);

        return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);

      }

      SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);

      return DAG.getNode(Op.getOpcode(), DL, VT, FExt);

    }


    // Narrowing conversions

    if (SrcEltSize > (2 * EltSize)) {

      if (IsInt2FP) {

        // One narrowing int_to_fp, then an fp_round.

        assert((EltVT == MVT::f16 || EltVT == MVT::bf16) &&

               "Unexpected [US]_TO_FP lowering");

        MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

        if (IsStrict) {

          SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,

                                       DAG.getVTList(InterimFVT, MVT::Other),

                                       Op.getOperand(0), Src);

          SDValue Chain = Int2FP.getValue(1);

          return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;

        }

        SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);

        return DAG.getFPExtendOrRound(Int2FP, DL, VT);

      }

      // FP2Int

      // One narrowing fp_to_int, then truncate the integer. If the float isn't

      // representable by the integer, the result is poison.

      MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),

                                    VT.getVectorElementCount());

      if (IsStrict) {

        SDValue FP2Int =

            DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),

                        Op.getOperand(0), Src);

        SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);

        return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);

      }

      SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);

      if (EltSize == 1)

        // The integer should be 0 or 1/-1, so compare the integer result to 0.

        return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, IVecVT), FP2Int,

                            ISD::SETNE);

      return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);

    }


    // Scalable vectors can exit here. Patterns will handle equally-sized

    // conversions halving/doubling ones.

    if (!VT.isFixedLengthVector())

      return Op;


    // For fixed-length vectors we lower to a custom "VL" node.

    unsigned RVVOpc = 0;

    switch (Op.getOpcode()) {

    default:

      llvm_unreachable("Impossible opcode");

    case ISD::FP_TO_SINT:

      RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;

      break;

    case ISD::FP_TO_UINT:

      RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;

      break;

    case ISD::SINT_TO_FP:

      RVVOpc = RISCVISD::SINT_TO_FP_VL;

      break;

    case ISD::UINT_TO_FP:

      RVVOpc = RISCVISD::UINT_TO_FP_VL;

      break;

    case ISD::STRICT_FP_TO_SINT:

      RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;

      break;

    case ISD::STRICT_FP_TO_UINT:

      RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;

      break;

    case ISD::STRICT_SINT_TO_FP:

      RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;

      break;

    case ISD::STRICT_UINT_TO_FP:

      RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;

      break;

    }


    MVT ContainerVT = getContainerForFixedLengthVector(VT);

    MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);

    assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&

           "Expected same element count");


    auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

    if (IsStrict) {

      Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),

                        Op.getOperand(0), Src, Mask, VL);

      SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);

      return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);

    }

    Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);

    return convertFromScalableVector(VT, Src, DAG, Subtarget);

  }

  case ISD::FP_TO_SINT_SAT:

  case ISD::FP_TO_UINT_SAT:

    return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);

  case ISD::FP_TO_BF16: {

    // Custom lower to ensure the libcall return is passed in an FPR on hard

    // float ABIs.

    assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");

    SDLoc DL(Op);

    MakeLibCallOptions CallOptions;

    RTLIB::Libcall LC =

        RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);

    SDValue Res =

        makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;

    if (Subtarget.is64Bit())

      return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);

    return DAG.getBitcast(MVT::i32, Res);

  }

  case ISD::BF16_TO_FP: {

    assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");

    MVT VT = Op.getSimpleValueType();

    SDLoc DL(Op);

    Op = DAG.getNode(

        ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),

        DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));

    SDValue Res = Subtarget.is64Bit()

                      ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)

                      : DAG.getBitcast(MVT::f32, Op);

    // fp_extend if the target VT is bigger than f32.

    if (VT != MVT::f32)

      return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);

    return Res;

  }

  case ISD::STRICT_FP_TO_FP16:

  case ISD::FP_TO_FP16: {

    // Custom lower to ensure the libcall return is passed in an FPR on hard

    // float ABIs.

    assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");

    SDLoc DL(Op);

    MakeLibCallOptions CallOptions;

    bool IsStrict = Op->isStrictFPOpcode();

    SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);

    SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

    RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);

    SDValue Res;

    std::tie(Res, Chain) =

        makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);

    if (Subtarget.is64Bit())

      return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);

    SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);

    if (IsStrict)

      return DAG.getMergeValues({Result, Chain}, DL);

    return Result;

  }

  case ISD::STRICT_FP16_TO_FP:

  case ISD::FP16_TO_FP: {

    // Custom lower to ensure the libcall argument is passed in an FPR on hard

    // float ABIs.

    assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");

    SDLoc DL(Op);

    MakeLibCallOptions CallOptions;

    bool IsStrict = Op->isStrictFPOpcode();

    SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);

    SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

    SDValue Arg = Subtarget.is64Bit()

                      ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)

                      : DAG.getBitcast(MVT::f32, Op0);

    SDValue Res;

    std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,

                                       CallOptions, DL, Chain);

    if (IsStrict)

      return DAG.getMergeValues({Res, Chain}, DL);

    return Res;

  }

  case ISD::FTRUNC:

  case ISD::FCEIL:

  case ISD::FFLOOR:

  case ISD::FNEARBYINT:

  case ISD::FRINT:

  case ISD::FROUND:

  case ISD::FROUNDEVEN:

    if (isPromotedOpNeedingSplit(Op, Subtarget, *this))

      return SplitVectorOp(Op, DAG);

    return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);

  case ISD::FCANONICALIZE: {

    MVT VT = Op.getSimpleValueType();

    assert(VT.isFixedLengthVector() && "Unexpected type");

    SDLoc DL(Op);

    MVT ContainerVT = getContainerForFixedLengthVector(VT);

    SDValue Src =

        convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);

    SDValue Res = DAG.getNode(ISD::FCANONICALIZE, DL, ContainerVT, Src);

    return convertFromScalableVector(VT, Res, DAG, Subtarget);

  }

  case ISD::LRINT:

  case ISD::LLRINT:

  case ISD::LROUND:

  case ISD::LLROUND: {

    if (Op.getValueType().isVector())

      return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);

    assert(Op.getOperand(0).getValueType() == MVT::f16 &&

           "Unexpected custom legalisation");

    SDLoc DL(Op);

    SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));

    return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);

  }

  case ISD::STRICT_LRINT:

  case ISD::STRICT_LLRINT:

  case ISD::STRICT_LROUND:

  case ISD::STRICT_LLROUND: {

    assert(Op.getOperand(1).getValueType() == MVT::f16 &&

           "Unexpected custom legalisation");

    SDLoc DL(Op);

    SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},

                              {Op.getOperand(0), Op.getOperand(1)});

    return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},

                       {Ext.getValue(1), Ext.getValue(0)});

  }

  case ISD::VECREDUCE_ADD:

  case ISD::VECREDUCE_UMAX:

  case ISD::VECREDUCE_SMAX:

  case ISD::VECREDUCE_UMIN:

  case ISD::VECREDUCE_SMIN:

    return lowerVECREDUCE(Op, DAG);

  case ISD::VECREDUCE_AND:

  case ISD::VECREDUCE_OR:

  case ISD::VECREDUCE_XOR:

    if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)

      return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);

    return lowerVECREDUCE(Op, DAG);

  case ISD::VECREDUCE_SEQ_FADD:

    if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget, *this))

      return SplitVectorReductionOp(Op, DAG, /*IsVP*/ false);

    return lowerFPVECREDUCE(Op, DAG);

  case ISD::VECREDUCE_FADD:

  case ISD::VECREDUCE_FMIN:

  case ISD::VECREDUCE_FMAX:

  case ISD::VECREDUCE_FMAXIMUM:

  case ISD::VECREDUCE_FMINIMUM:

    if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget, *this))

      return SplitVectorReductionOp(Op, DAG, /*IsVP*/ false);

    return lowerFPVECREDUCE(Op, DAG);

  case ISD::VP_REDUCE_ADD:

  case ISD::VP_REDUCE_UMAX:

  case ISD::VP_REDUCE_SMAX:

  case ISD::VP_REDUCE_UMIN:

  case ISD::VP_REDUCE_SMIN:

  case ISD::VP_REDUCE_FADD:

  case ISD::VP_REDUCE_SEQ_FADD:

  case ISD::VP_REDUCE_FMIN:

  case ISD::VP_REDUCE_FMAX:

  case ISD::VP_REDUCE_FMINIMUM:

  case ISD::VP_REDUCE_FMAXIMUM:

    if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget, *this))

      return SplitVectorReductionOp(Op, DAG, /*IsVP*/ true);

    return lowerVPREDUCE(Op, DAG);

  case ISD::VP_REDUCE_AND:

  case ISD::VP_REDUCE_OR:

  case ISD::VP_REDUCE_XOR:

    if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)

      return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);

    return lowerVPREDUCE(Op, DAG);

  case ISD::VP_CTTZ_ELTS:

  case ISD::VP_CTTZ_ELTS_ZERO_POISON:

    return lowerVPCttzElements(Op, DAG);

  case ISD::UNDEF: {

    MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());

    return convertFromScalableVector(Op.getSimpleValueType(),

                                     DAG.getUNDEF(ContainerVT), DAG, Subtarget);

  }

  case ISD::INSERT_SUBVECTOR:

    return lowerINSERT_SUBVECTOR(Op, DAG);

  case ISD::EXTRACT_SUBVECTOR:

    return lowerEXTRACT_SUBVECTOR(Op, DAG);

  case ISD::VECTOR_DEINTERLEAVE:

    return lowerVECTOR_DEINTERLEAVE(Op, DAG);

  case ISD::VECTOR_INTERLEAVE:

    return lowerVECTOR_INTERLEAVE(Op, DAG);

  case ISD::STEP_VECTOR:

    return lowerSTEP_VECTOR(Op, DAG);

  case ISD::VECTOR_REVERSE:

    return lowerVECTOR_REVERSE(Op, DAG);

  case ISD::VECTOR_SPLICE_LEFT:

  case ISD::VECTOR_SPLICE_RIGHT:

    return lowerVECTOR_SPLICE(Op, DAG);

  case ISD::BUILD_VECTOR: {

    MVT VT = Op.getSimpleValueType();

    MVT EltVT = VT.getVectorElementType();

    if (!Subtarget.is64Bit() && EltVT == MVT::i64)

      return lowerBuildVectorViaVID(Op, DAG, Subtarget);

    return lowerBUILD_VECTOR(Op, DAG, Subtarget);

  }

  case ISD::SPLAT_VECTOR: {

    MVT VT = Op.getSimpleValueType();

    MVT EltVT = VT.getVectorElementType();

    if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

        EltVT == MVT::bf16) {

      SDLoc DL(Op);

      SDValue Elt;

      if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||

          (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))

        Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),

                          Op.getOperand(0));

      else

        Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));

      MVT IVT = VT.changeVectorElementType(MVT::i16);

      return DAG.getNode(ISD::BITCAST, DL, VT,

                         DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));

    }


    if (EltVT == MVT::i1)

      return lowerVectorMaskSplat(Op, DAG);

    return SDValue();

  }

  case ISD::VECTOR_SHUFFLE:

    return lowerVECTOR_SHUFFLE(Op, DAG);

  case ISD::CONCAT_VECTORS: {

    // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is

    // better than going through the stack, as the default expansion does.

    SDLoc DL(Op);

    MVT VT = Op.getSimpleValueType();

    MVT ContainerVT = VT;

    if (VT.isFixedLengthVector())

      ContainerVT = ::getContainerForFixedLengthVector(VT, Subtarget);


    // Recursively split concat_vectors with more than 2 operands:

    //

    // concat_vector op1, op2, op3, op4

    // ->

    // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)

    //

    // This reduces the length of the chain of vslideups and allows us to

    // perform the vslideups at a smaller LMUL, limited to MF2.

    if (Op.getNumOperands() > 2 &&

        ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {

      MVT HalfVT = VT.getHalfNumVectorElementsVT();

      assert(isPowerOf2_32(Op.getNumOperands()));

      size_t HalfNumOps = Op.getNumOperands() / 2;

      SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,

                               Op->ops().take_front(HalfNumOps));

      SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,

                               Op->ops().drop_front(HalfNumOps));

      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);

    }


    unsigned NumOpElts =

        Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();

    SDValue Vec = DAG.getUNDEF(VT);

    for (const auto &OpIdx : enumerate(Op->ops())) {

      SDValue SubVec = OpIdx.value();

      // Don't insert undef subvectors.

      if (SubVec.isUndef())

        continue;

      Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);

    }

    return Vec;

  }

  case ISD::LOAD: {

    auto *Load = cast<LoadSDNode>(Op);

    EVT VT = Load->getValueType(0);

    if (VT == MVT::f64) {

      assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&

             !Subtarget.is64Bit() && "Unexpected custom legalisation");


      // Replace a double precision load with two i32 loads and a BuildPairF64.

      SDLoc DL(Op);

      SDValue BasePtr = Load->getBasePtr();

      SDValue Chain = Load->getChain();


      SDValue Lo =

          DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),

                      Load->getBaseAlign(), Load->getMemOperand()->getFlags());

      BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));

      SDValue Hi = DAG.getLoad(

          MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),

          Load->getBaseAlign(), Load->getMemOperand()->getFlags());

      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),

                          Hi.getValue(1));


      // For big-endian, swap the order of Lo and Hi.

      if (!Subtarget.isLittleEndian())

        std::swap(Lo, Hi);


      SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);

      return DAG.getMergeValues({Pair, Chain}, DL);

    }


    if (Subtarget.hasStdExtP() && !Subtarget.is64Bit() &&

        (VT == MVT::v2i32 || VT == MVT::v4i16 || VT == MVT::v8i8)) {

      assert(!Subtarget.is64Bit() && "Unexpected custom legalisation");


      // Determine the half-size type

      MVT HalfVT;

      if (VT == MVT::v2i32)

        HalfVT = MVT::i32;

      else if (VT == MVT::v4i16)

        HalfVT = MVT::v2i16;

      else // VT == MVT::v8i8

        HalfVT = MVT::v4i8;


      SDLoc DL(Op);

      SDValue BasePtr = Load->getBasePtr();

      SDValue Chain = Load->getChain();


      // Create two loads for the lower and upper halves

      SDValue Lo =

          DAG.getLoad(HalfVT, DL, Chain, BasePtr, Load->getPointerInfo(),

                      Load->getBaseAlign(), Load->getMemOperand()->getFlags());

      unsigned HalfSize = HalfVT.getStoreSize();

      BasePtr =

          DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(HalfSize));

      SDValue Hi =

          DAG.getLoad(HalfVT, DL, Chain, BasePtr,

                      Load->getPointerInfo().getWithOffset(HalfSize),

                      Load->getBaseAlign(), Load->getMemOperand()->getFlags());

      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),

                          Hi.getValue(1));


      // Combine the two halves into the result vector

      SDValue Result;

      if (VT == MVT::v2i32) {

        // For v2i32, build vector from two i32 scalars

        Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Lo, Hi);

      } else {

        // For v4i16 and v8i8, use CONCAT_VECTORS

        Result = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);

      }


      return DAG.getMergeValues({Result, Chain}, DL);

    }


    if (VT == MVT::bf16)

      return lowerXAndesBfHCvtBFloat16Load(Op, DAG);


    // Handle normal vector tuple load.

    if (VT.isRISCVVectorTuple()) {

      SDLoc DL(Op);

      MVT XLenVT = Subtarget.getXLenVT();

      unsigned NF = VT.getRISCVVectorTupleNumFields();

      unsigned Sz = VT.getSizeInBits().getKnownMinValue();

      unsigned NumElts = Sz / (NF * 8);

      int Log2LMUL = Log2_64(NumElts) - 3;


      auto Flag = SDNodeFlags();

      Flag.setNoUnsignedWrap(true);

      SDValue Ret = DAG.getUNDEF(VT);

      SDValue BasePtr = Load->getBasePtr();

      SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);

      VROffset =

          DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,

                      DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));

      SmallVector<SDValue, 8> OutChains;


      // Load NF vector registers and combine them to a vector tuple.

      for (unsigned i = 0; i < NF; ++i) {

        SDValue LoadVal = DAG.getLoad(

            MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),

            BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));

        OutChains.push_back(LoadVal.getValue(1));

        Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,

                          DAG.getTargetConstant(i, DL, MVT::i32));

        BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);

      }

      return DAG.getMergeValues(

          {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);

    }


    if (auto V = expandUnalignedRVVLoad(Op, DAG))

      return V;

    if (Op.getValueType().isFixedLengthVector())

      return lowerFixedLengthVectorLoadToRVV(Op, DAG);

    return Op;

  }

  case ISD::STORE: {

    auto *Store = cast<StoreSDNode>(Op);

    SDValue StoredVal = Store->getValue();

    EVT VT = StoredVal.getValueType();


    if (VT == MVT::f64) {

      assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&

             !Subtarget.is64Bit() && "Unexpected custom legalisation");


      // Replace a double precision store with a SplitF64 and i32 stores.

      SDValue DL(Op);

      SDValue BasePtr = Store->getBasePtr();

      SDValue Chain = Store->getChain();

      SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,

                                  DAG.getVTList(MVT::i32, MVT::i32), StoredVal);


      SDValue Lo = Split.getValue(0);

      SDValue Hi = Split.getValue(1);


      // For big-endian, swap the order of Lo and Hi before storing.

      if (!Subtarget.isLittleEndian())

        std::swap(Lo, Hi);


      SDValue LoStore = DAG.getStore(

          Chain, DL, Lo, BasePtr, Store->getPointerInfo(),

          Store->getBaseAlign(), Store->getMemOperand()->getFlags());

      BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));

      SDValue HiStore = DAG.getStore(

          Chain, DL, Hi, BasePtr, Store->getPointerInfo().getWithOffset(4),

          Store->getBaseAlign(), Store->getMemOperand()->getFlags());

      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);

    }

    if (VT == MVT::i64) {

      assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&

             "Unexpected custom legalisation");

      if (Store->isTruncatingStore())

        return SDValue();


      if (Store->getAlign() < Subtarget.getZilsdAlign())

        return SDValue();


      SDLoc DL(Op);

      SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,

                               DAG.getTargetConstant(0, DL, MVT::i32));

      SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,

                               DAG.getTargetConstant(1, DL, MVT::i32));


      return DAG.getMemIntrinsicNode(

          RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),

          {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,

          Store->getMemOperand());

    }


    if (Subtarget.hasStdExtP() && !Subtarget.is64Bit() &&

        (VT == MVT::v2i32 || VT == MVT::v4i16 || VT == MVT::v8i8)) {

      assert(!Subtarget.is64Bit() && "Unexpected custom legalisation");


      auto *Store = cast<StoreSDNode>(Op);

      SDValue Val = Store->getValue();


      // Determine the half-size type

      MVT HalfVT;

      if (VT == MVT::v2i32)

        HalfVT = MVT::i32;

      else if (VT == MVT::v4i16)

        HalfVT = MVT::v2i16;

      else // VT == MVT::v8i8

        HalfVT = MVT::v4i8;


      SDLoc DL(Op);

      SDValue BasePtr = Store->getBasePtr();

      SDValue Chain = Store->getChain();


      // Extract the two halves from the vector

      SDValue Lo, Hi;

      if (VT == MVT::v2i32) {

        // For v2i32, extract two i32 scalars

        Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Val,

                         DAG.getVectorIdxConstant(0, DL));

        Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Val,

                         DAG.getVectorIdxConstant(1, DL));

      } else {

        // For v4i16 and v8i8, extract two vector halves

        Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Val,

                         DAG.getVectorIdxConstant(0, DL));

        unsigned HalfNumElts = HalfVT.getVectorNumElements();

        Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Val,

                         DAG.getVectorIdxConstant(HalfNumElts, DL));

      }


      // Create two stores for the lower and upper halves

      SDValue LoStore = DAG.getStore(

          Chain, DL, Lo, BasePtr, Store->getPointerInfo(),

          Store->getBaseAlign(), Store->getMemOperand()->getFlags());

      unsigned HalfSize = HalfVT.getStoreSize();

      BasePtr =

          DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(HalfSize));

      SDValue HiStore = DAG.getStore(

          Chain, DL, Hi, BasePtr,

          Store->getPointerInfo().getWithOffset(HalfSize),

          Store->getBaseAlign(), Store->getMemOperand()->getFlags());


      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);

    }


    if (VT == MVT::bf16)

      return lowerXAndesBfHCvtBFloat16Store(Op, DAG);


    // Handle normal vector tuple store.

    if (VT.isRISCVVectorTuple()) {

      SDLoc DL(Op);

      MVT XLenVT = Subtarget.getXLenVT();

      unsigned NF = VT.getRISCVVectorTupleNumFields();

      unsigned Sz = VT.getSizeInBits().getKnownMinValue();

      unsigned NumElts = Sz / (NF * 8);

      int Log2LMUL = Log2_64(NumElts) - 3;


      auto Flag = SDNodeFlags();

      Flag.setNoUnsignedWrap(true);

      SDValue Ret;

      SDValue Chain = Store->getChain();

      SDValue BasePtr = Store->getBasePtr();

      SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);

      VROffset =

          DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,

                      DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));


      // Extract subregisters in a vector tuple and store them individually.

      for (unsigned i = 0; i < NF; ++i) {

        auto Extract =

            DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,

                        MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,

                        DAG.getTargetConstant(i, DL, MVT::i32));

        Ret = DAG.getStore(Chain, DL, Extract, BasePtr,

                           MachinePointerInfo(Store->getAddressSpace()),

                           Store->getBaseAlign(),

                           Store->getMemOperand()->getFlags());

        Chain = Ret.getValue(0);

        BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);

      }

      return Ret;

    }


    if (auto V = expandUnalignedRVVStore(Op, DAG))

      return V;

    if (Op.getOperand(1).getValueType().isFixedLengthVector())

      return lowerFixedLengthVectorStoreToRVV(Op, DAG);

    return Op;

  }

  case ISD::VP_LOAD:

    if (SDValue V = expandUnalignedVPLoad(Op, DAG))

      return V;

    [[fallthrough]];

  case ISD::MLOAD:

    return lowerMaskedLoad(Op, DAG);

  case ISD::VP_LOAD_FF:

    return lowerLoadFF(Op, DAG);

  case ISD::VP_STORE:

    if (SDValue V = expandUnalignedVPStore(Op, DAG))

      return V;

    [[fallthrough]];

  case ISD::MSTORE:

    return lowerMaskedStore(Op, DAG);

  case ISD::VECTOR_COMPRESS:

    return lowerVectorCompress(Op, DAG);

  case ISD::SELECT_CC: {

    // This occurs because we custom legalize SETGT and SETUGT for setcc. That

    // causes LegalizeDAG to think we need to custom legalize select_cc. Expand

    // into separate SETCC+SELECT just like LegalizeDAG.

    SDValue Tmp1 = Op.getOperand(0);

    SDValue Tmp2 = Op.getOperand(1);

    SDValue True = Op.getOperand(2);

    SDValue False = Op.getOperand(3);

    EVT VT = Op.getValueType();

    SDValue CC = Op.getOperand(4);

    EVT CmpVT = Tmp1.getValueType();

    EVT CCVT =

        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);

    SDLoc DL(Op);

    SDValue Cond =

        DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());

    return DAG.getSelect(DL, VT, Cond, True, False);

  }

  case ISD::SETCC: {

    MVT OpVT = Op.getOperand(0).getSimpleValueType();

    if (OpVT.isScalarInteger()) {

      MVT VT = Op.getSimpleValueType();

      SDValue LHS = Op.getOperand(0);

      SDValue RHS = Op.getOperand(1);

      ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();

      assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&

             "Unexpected CondCode");


      SDLoc DL(Op);


      // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can

      // convert this to the equivalent of (set(u)ge X, C+1) by using

      // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant

      // in a register.

      if (isa<ConstantSDNode>(RHS)) {

        int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();

        if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {

          // If this is an unsigned compare and the constant is -1, incrementing

          // the constant would change behavior. The result should be false.

          if (CCVal == ISD::SETUGT && Imm == -1)

            return DAG.getConstant(0, DL, VT);

          // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.

          CCVal = ISD::getSetCCSwappedOperands(CCVal);

          SDValue SetCC = DAG.getSetCC(

              DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);

          return DAG.getLogicalNOT(DL, SetCC, VT);

        }

        // Lower (setugt X, 2047) as (setne (srl X, 11), 0).

        if (CCVal == ISD::SETUGT && Imm == 2047) {

          SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,

                                      DAG.getShiftAmountConstant(11, OpVT, DL));

          return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),

                              ISD::SETNE);

        }

      }


      // Not a constant we could handle, swap the operands and condition code to

      // SETLT/SETULT.

      CCVal = ISD::getSetCCSwappedOperands(CCVal);

      return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);

    }


    MVT VT = Op.getSimpleValueType();

    if (Subtarget.hasStdExtP() && VT.isFixedLengthVector()) {

      ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();

      SDValue LHS = Op.getOperand(0);

      SDValue RHS = Op.getOperand(1);

      SDLoc DL(Op);

      if (CCVal == ISD::SETNE) {

        // Convert setne X, 0 to setult 0, X.

        if (ISD::isConstantSplatVectorAllZeros(RHS.getNode())) {

          return DAG.getSetCC(DL, VT, RHS, LHS, ISD::SETULT);

        }


        // Not a constant we could handle, convert to SETEQ+Invert

        SDValue SetCC = DAG.getSetCC(DL, VT, LHS, RHS, ISD::SETEQ);

        return DAG.getLogicalNOT(DL, SetCC, VT);

      }


      if (CCVal == ISD::SETGT) {

        if (ISD::isConstantSplatVectorAllOnes(RHS.getNode())) {

          SDValue SetCC =

              DAG.getSetCC(DL, VT, LHS, DAG.getConstant(0, DL, VT), ISD::SETLT);

          return DAG.getLogicalNOT(DL, SetCC, VT);

        }


        // Not a constant we could handle, swap the operands and condition code

        // to SETLT.

        CCVal = ISD::getSetCCSwappedOperands(CCVal);

        return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);

      }


      return SDValue();

    }


    if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget, *this))

      return SplitVectorOp(Op, DAG);


    return lowerToScalableOp(Op, DAG);

  }

  case ISD::ADD:

  case ISD::SUB:

  case ISD::SDIV:

  case ISD::SREM:

  case ISD::UDIV:

  case ISD::UREM:

  case ISD::BSWAP:

  case ISD::CTPOP:

    return lowerToScalableOp(Op, DAG);

  case ISD::VSELECT: {

    EVT VT = Op.getValueType();

    // Split 64-bit vector VSELECT on RV32 with P extension for v4i16 and v8i8

    if (Subtarget.hasStdExtP() && !Subtarget.is64Bit() &&

        (VT == MVT::v4i16 || VT == MVT::v8i8)) {

      SDLoc DL(Op);

      SDValue Mask = Op.getOperand(0);

      SDValue TrueVal = Op.getOperand(1);

      SDValue FalseVal = Op.getOperand(2);


      // Split all three operands into two halves

      auto [MaskLo, MaskHi] = DAG.SplitVector(Mask, DL);

      auto [TrueLo, TrueHi] = DAG.SplitVector(TrueVal, DL);

      auto [FalseLo, FalseHi] = DAG.SplitVector(FalseVal, DL);


      // Perform VSELECT on each half

      SDValue ResLo = DAG.getNode(ISD::VSELECT, DL, TrueLo.getValueType(),

                                  MaskLo, TrueLo, FalseLo);

      SDValue ResHi = DAG.getNode(ISD::VSELECT, DL, TrueHi.getValueType(),

                                  MaskHi, TrueHi, FalseHi);


      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);

    }

    return lowerToScalableOp(Op, DAG);

  }

  case ISD::AND:

  case ISD::OR:

  case ISD::XOR:

  case ISD::MUL:

  case ISD::MULHS:

  case ISD::MULHU: {

    EVT VT = Op.getValueType();

    // Split 64-bit vector AND/OR/XOR/MUL/MULHS/MULHU on RV32 with P extension

    if (Subtarget.hasStdExtP() && !Subtarget.is64Bit() &&

        (VT == MVT::v4i16 || VT == MVT::v8i8)) {

      SDLoc DL(Op);

      SDValue LHS = Op.getOperand(0);

      SDValue RHS = Op.getOperand(1);


      // Determine the half-size type

      MVT HalfVT = (VT == MVT::v4i16) ? MVT::v2i16 : MVT::v4i8;


      // Extract the two halves from LHS

      auto [LHSLo, LHSHi] = DAG.SplitVector(LHS, DL, HalfVT, HalfVT);


      // Extract the two halves from RHS

      auto [RHSLo, RHSHi] = DAG.SplitVector(RHS, DL, HalfVT, HalfVT);


      // Perform the operation on each half

      unsigned Opc = Op.getOpcode();

      SDValue ResLo = DAG.getNode(Opc, DL, HalfVT, LHSLo, RHSLo);

      SDValue ResHi = DAG.getNode(Opc, DL, HalfVT, LHSHi, RHSHi);


      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);

    }

    return lowerToScalableOp(Op, DAG);

  }

  case ISD::ANY_EXTEND_VECTOR_INREG: {

    EVT VT = Op.getValueType();

    assert(Subtarget.hasStdExtP() && Subtarget.is64Bit() &&

           (VT == MVT::v2i32 || VT == MVT::v4i16) &&

           "Unexpected custom legalisation");

    return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), VT,

                       Op.getOperand(0));

  }

  case ISD::SHL:

  case ISD::SRL:

  case ISD::SRA:

  case ISD::SSHLSAT:

    if (Op.getSimpleValueType().isFixedLengthVector()) {

      if (Subtarget.hasStdExtP()) {

        SDValue ShAmtVec = Op.getOperand(1);

        SDValue SplatVal;

        if (ShAmtVec.getOpcode() == ISD::SPLAT_VECTOR)

          SplatVal = ShAmtVec.getOperand(0);

        else if (ShAmtVec.getOpcode() == ISD::BUILD_VECTOR)

          SplatVal = cast<BuildVectorSDNode>(ShAmtVec)->getSplatValue();


        if (!SplatVal)

          return SDValue();


        unsigned Opc;

        switch (Op.getOpcode()) {

        default:

          llvm_unreachable("Unexpected opcode");

        case ISD::SHL:

          Opc = RISCVISD::PSHL;

          break;

        case ISD::SRL:

          Opc = RISCVISD::PSRL;

          break;

        case ISD::SRA:

          Opc = RISCVISD::PSRA;

          break;

        case ISD::SSHLSAT:

          Opc = RISCVISD::PSSHA;

          break;

        }

        return DAG.getNode(Opc, SDLoc(Op), Op.getValueType(), Op.getOperand(0),

                           SplatVal);

      }

      return lowerToScalableOp(Op, DAG);

    }

    assert(Op.getOpcode() != ISD::SSHLSAT);

    // This can be called for an i32 shift amount that needs to be promoted.

    assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");

    return SDValue();

  case ISD::MASKED_UDIV:

  case ISD::MASKED_SDIV:

  case ISD::MASKED_UREM:

  case ISD::MASKED_SREM: {

    SDLoc DL(Op);

    MVT VT = Op.getSimpleValueType();

    MVT ContainerVT = getContainerForFixedLengthVector(VT);

    SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;

    SDValue Res = DAG.getNode(

        getRISCVVLOp(Op), DL, ContainerVT,

        convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget),

        convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget),

        DAG.getUNDEF(ContainerVT),

        convertToScalableVector(getMaskTypeFor(ContainerVT), Op.getOperand(2),

                                DAG, Subtarget),

        VL);

    return convertFromScalableVector(VT, Res, DAG, Subtarget);

  }

  case ISD::FABS:

  case ISD::FNEG:

    if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)

      return lowerFABSorFNEG(Op, DAG, Subtarget);

    [[fallthrough]];

  case ISD::FADD:

  case ISD::FSUB:

  case ISD::FMUL:

  case ISD::FDIV:

  case ISD::FSQRT:

  case ISD::FMA:

  case ISD::FMINNUM:

  case ISD::FMAXNUM:

  case ISD::FMINIMUMNUM:

  case ISD::FMAXIMUMNUM:

    if (isPromotedOpNeedingSplit(Op, Subtarget, *this))

      return SplitVectorOp(Op, DAG);

    [[fallthrough]];

  case ISD::AVGFLOORS:

  case ISD::AVGFLOORU:

  case ISD::AVGCEILS:

  case ISD::AVGCEILU:

  case ISD::SMIN:

  case ISD::SMAX:

  case ISD::UMIN:

  case ISD::UMAX:

  case ISD::UADDSAT:

  case ISD::USUBSAT:

  case ISD::SADDSAT:

  case ISD::SSUBSAT:

    return lowerToScalableOp(Op, DAG);

  case ISD::ABDS:

  case ISD::ABDU: {

    EVT VT = Op->getValueType(0);

    // Only SEW=8/16 are supported in Zvabd.

    if (Subtarget.hasStdExtZvabd() && VT.isVector() &&

        (VT.getVectorElementType() == MVT::i8 ||

         VT.getVectorElementType() == MVT::i16))

      return lowerToScalableOp(Op, DAG);


    SDLoc dl(Op);

    SDValue LHS = DAG.getFreeze(Op->getOperand(0));

    SDValue RHS = DAG.getFreeze(Op->getOperand(1));

    bool IsSigned = Op->getOpcode() == ISD::ABDS;


    // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))

    // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))

    unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;

    unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;

    SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);

    SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);

    return DAG.getNode(ISD::SUB, dl, VT, Max, Min);

  }

  case ISD::ABS:

  case ISD::ABS_MIN_POISON:

    return lowerABS(Op, DAG);

  case ISD::CTLZ:

  case ISD::CTLZ_ZERO_POISON:

  case ISD::CTTZ:

  case ISD::CTTZ_ZERO_POISON:

    if (Subtarget.hasStdExtZvbb())

      return lowerToScalableOp(Op, DAG);

    assert(Op.getOpcode() != ISD::CTTZ);

    return lowerCTLZ_CTTZ_ZERO_POISON(Op, DAG);

  case ISD::CLMUL: {

    MVT VT = Op.getSimpleValueType();

    assert(VT.isScalableVector() && Subtarget.hasStdExtZvbc() &&

           "Unexpected custom legalisation");

    // Promote to i64 vector.

    MVT I64VecVT = VT.changeVectorElementType(MVT::i64);

    SDLoc DL(Op);

    SDValue Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, I64VecVT, Op.getOperand(0));

    SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, I64VecVT, Op.getOperand(1));

    SDValue CLMUL = DAG.getNode(ISD::CLMUL, DL, I64VecVT, Op0, Op1);

    return DAG.getNode(ISD::TRUNCATE, DL, VT, CLMUL);

  }

  case ISD::FCOPYSIGN:

    if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)

      return lowerFCOPYSIGN(Op, DAG, Subtarget);

    if (isPromotedOpNeedingSplit(Op, Subtarget, *this))

      return SplitVectorOp(Op, DAG);

    return lowerToScalableOp(Op, DAG);

  case ISD::STRICT_FADD:

  case ISD::STRICT_FSUB:

  case ISD::STRICT_FMUL:

  case ISD::STRICT_FDIV:

  case ISD::STRICT_FSQRT:

  case ISD::STRICT_FMA:

    if (isPromotedOpNeedingSplit(Op, Subtarget, *this))

      return SplitStrictFPVectorOp(Op, DAG);

    return lowerToScalableOp(Op, DAG);

  case ISD::STRICT_FSETCC:

  case ISD::STRICT_FSETCCS:

    return lowerVectorStrictFSetcc(Op, DAG);

  case ISD::STRICT_FCEIL:

  case ISD::STRICT_FRINT:

  case ISD::STRICT_FFLOOR:

  case ISD::STRICT_FTRUNC:

  case ISD::STRICT_FNEARBYINT:

  case ISD::STRICT_FROUND:

  case ISD::STRICT_FROUNDEVEN:

    return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);

  case ISD::MGATHER:

  case ISD::VP_GATHER:

    return lowerMaskedGather(Op, DAG);

  case ISD::MSCATTER:

  case ISD::VP_SCATTER:

    return lowerMaskedScatter(Op, DAG);

  case ISD::GET_ROUNDING:

    return lowerGET_ROUNDING(Op, DAG);

  case ISD::SET_ROUNDING:

    return lowerSET_ROUNDING(Op, DAG);

  case ISD::GET_FPENV:

    return lowerGET_FPENV(Op, DAG);

  case ISD::SET_FPENV:

    return lowerSET_FPENV(Op, DAG);

  case ISD::RESET_FPENV:

    return lowerRESET_FPENV(Op, DAG);

  case ISD::GET_FPMODE:

    return lowerGET_FPMODE(Op, DAG);

  case ISD::SET_FPMODE:

    return lowerSET_FPMODE(Op, DAG);

  case ISD::RESET_FPMODE:

    return lowerRESET_FPMODE(Op, DAG);

  case ISD::EH_DWARF_CFA:

    return lowerEH_DWARF_CFA(Op, DAG);

  case ISD::VP_MERGE:

    if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

      return lowerVPMergeMask(Op, DAG);

    [[fallthrough]];

  case ISD::VP_SDIV:

  case ISD::VP_UDIV:

  case ISD::VP_SREM:

  case ISD::VP_UREM:

    return lowerVPOp(Op, DAG);

  case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:

    return lowerVPStridedLoad(Op, DAG);

  case ISD::EXPERIMENTAL_VP_STRIDED_STORE:

    return lowerVPStridedStore(Op, DAG);

  case ISD::EXPERIMENTAL_VP_SPLICE:

    return lowerVPSpliceExperimental(Op, DAG);

  case ISD::EXPERIMENTAL_VP_REVERSE:

    return lowerVPReverseExperimental(Op, DAG);

  case ISD::CLEAR_CACHE: {

    assert(getTargetMachine().getTargetTriple().isOSLinux() &&

           "llvm.clear_cache only needs custom lower on Linux targets");

    SDLoc DL(Op);

    SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());

    return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),

                           Op.getOperand(2), Flags, DL);

  }

  case ISD::DYNAMIC_STACKALLOC:

    return lowerDYNAMIC_STACKALLOC(Op, DAG);

  case ISD::INIT_TRAMPOLINE:

    return lowerINIT_TRAMPOLINE(Op, DAG);

  case ISD::ADJUST_TRAMPOLINE:

    return lowerADJUST_TRAMPOLINE(Op, DAG);

  case ISD::PARTIAL_REDUCE_UMLA:

  case ISD::PARTIAL_REDUCE_SMLA:

  case ISD::PARTIAL_REDUCE_SUMLA:

    return lowerPARTIAL_REDUCE_MLA(Op, DAG);

  case ISD::CTTZ_ELTS:

  case ISD::CTTZ_ELTS_ZERO_POISON:

    return lowerCttzElts(Op, DAG, Subtarget);

  }

}


SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,

                                             SDValue Start, SDValue End,

                                             SDValue Flags, SDLoc DL) const {

  MakeLibCallOptions CallOptions;

  std::pair<SDValue, SDValue> CallResult =

      makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,

                  {Start, End, Flags}, CallOptions, DL, InChain);


  // This function returns void so only the out chain matters.

  return CallResult.second;

}


SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,

                                                  SelectionDAG &DAG) const {

  if (!Subtarget.is64Bit())

    llvm::reportFatalUsageError("Trampolines only implemented for RV64");


  // Create an MCCodeEmitter to encode instructions.

  TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();

  assert(TLO);

  MCContext &MCCtx = TLO->getContext();


  std::unique_ptr<MCCodeEmitter> CodeEmitter(

      createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));


  SDValue Root = Op.getOperand(0);

  SDValue Trmp = Op.getOperand(1); // trampoline

  SDLoc dl(Op);


  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();


  // We store in the trampoline buffer the following instructions and data.

  // Offset:

  //      0: auipc   t2, 0

  //      4: ld      t0, 24(t2)

  //      8: ld      t2, 16(t2)

  //     12: jalr    t0

  //     16: <StaticChainOffset>

  //     24: <FunctionAddressOffset>

  //     32:

  // Offset with branch control flow protection enabled:

  //      0: lpad    <imm20>

  //      4: auipc   t3, 0

  //      8: ld      t2, 28(t3)

  //     12: ld      t3, 20(t3)

  //     16: jalr    t2

  //     20: <StaticChainOffset>

  //     28: <FunctionAddressOffset>

  //     36:


  const MachineFunction &MF = DAG.getMachineFunction();

  const bool HasCFBranch =

      MF.getInfo<RISCVMachineFunctionInfo>()->hasCFProtectionBranch();

  const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;

  const unsigned StaticChainOffset = StaticChainIdx * 4;

  const unsigned FunctionAddressOffset = StaticChainOffset + 8;


  const MCSubtargetInfo &STI = getTargetMachine().getMCSubtargetInfo();

  auto GetEncoding = [&](const MCInst &MC) {

    SmallVector<char, 4> CB;

    SmallVector<MCFixup> Fixups;

    CodeEmitter->encodeInstruction(MC, CB, Fixups, STI);

    uint32_t Encoding = support::endian::read32le(CB.data());

    return Encoding;

  };


  SmallVector<SDValue> OutChains;


  SmallVector<uint32_t> Encodings;

  if (!HasCFBranch) {

    Encodings.append(

        {// auipc t2, 0

         // Loads the current PC into t2.

         GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),

         // ld t0, 24(t2)

         // Loads the function address into t0. Note that we are using offsets

         // pc-relative to the first instruction of the trampoline.

         GetEncoding(MCInstBuilder(RISCV::LD)

                         .addReg(RISCV::X5)

                         .addReg(RISCV::X7)

                         .addImm(FunctionAddressOffset)),

         // ld t2, 16(t2)

         // Load the value of the static chain.

         GetEncoding(MCInstBuilder(RISCV::LD)

                         .addReg(RISCV::X7)

                         .addReg(RISCV::X7)

                         .addImm(StaticChainOffset)),

         // jalr t0

         // Jump to the function.

         GetEncoding(MCInstBuilder(RISCV::JALR)

                         .addReg(RISCV::X0)

                         .addReg(RISCV::X5)

                         .addImm(0))});

  } else {

    Encodings.append(

        {// auipc x0, <imm20> (lpad <imm20>)

         // Landing pad.

         GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),

         // auipc t3, 0

         // Loads the current PC into t3.

         GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),

         // ld t2, (FunctionAddressOffset - 4)(t3)

         // Loads the function address into t2. Note that we are using offsets

         // pc-relative to the SECOND instruction of the trampoline.

         GetEncoding(MCInstBuilder(RISCV::LD)

                         .addReg(RISCV::X7)

                         .addReg(RISCV::X28)

                         .addImm(FunctionAddressOffset - 4)),

         // ld t3, (StaticChainOffset - 4)(t3)

         // Load the value of the static chain.

         GetEncoding(MCInstBuilder(RISCV::LD)

                         .addReg(RISCV::X28)

                         .addReg(RISCV::X28)

                         .addImm(StaticChainOffset - 4)),

         // jalr t2

         // Software-guarded jump to the function.

         GetEncoding(MCInstBuilder(RISCV::JALR)

                         .addReg(RISCV::X0)

                         .addReg(RISCV::X7)

                         .addImm(0))});

  }


  // Store encoded instructions.

  for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {

    SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,

                                         DAG.getConstant(Idx * 4, dl, MVT::i64))

                           : Trmp;

    OutChains.push_back(DAG.getTruncStore(

        Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,

        MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));

  }


  // Now store the variable part of the trampoline.

  SDValue FunctionAddress = Op.getOperand(2);

  SDValue StaticChain = Op.getOperand(3);


  // Store the given static chain and function pointer in the trampoline buffer.

  struct OffsetValuePair {

    const unsigned Offset;

    const SDValue Value;

    SDValue Addr = SDValue(); // Used to cache the address.

  } OffsetValues[] = {

      {StaticChainOffset, StaticChain},

      {FunctionAddressOffset, FunctionAddress},

  };

  for (auto &OffsetValue : OffsetValues) {

    SDValue Addr =

        DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,

                    DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));

    OffsetValue.Addr = Addr;

    OutChains.push_back(

        DAG.getStore(Root, dl, OffsetValue.Value, Addr,

                     MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));

  }


  assert(OutChains.size() == StaticChainIdx + 2 &&

         "Size of OutChains mismatch");

  SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);


  // The end of instructions of trampoline is the same as the static chain

  // address that we computed earlier.

  SDValue EndOfTrmp = OffsetValues[0].Addr;


  // Call clear cache on the trampoline instructions.

  SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,

                              Trmp, EndOfTrmp);


  return Chain;

}


SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,

                                                    SelectionDAG &DAG) const {

  if (!Subtarget.is64Bit())

    llvm::reportFatalUsageError("Trampolines only implemented for RV64");


  return Op.getOperand(0);

}


SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,

                                                     SelectionDAG &DAG) const {

  // Currently, only the vdot4a and vdot4au case (from zvdot4a8i) should be

  // legal.

  // TODO: There are many other sub-cases we could potentially lower, are

  // any of them worthwhile?  Ex: via vredsum, vwredsum, vwwmaccu, etc..

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  SDValue Accum = Op.getOperand(0);

  assert(Accum.getSimpleValueType() == VT &&

         VT.getVectorElementType() == MVT::i32);

  SDValue A = Op.getOperand(1);

  SDValue B = Op.getOperand(2);

  MVT ArgVT = A.getSimpleValueType();

  assert(ArgVT == B.getSimpleValueType() &&

         ArgVT.getVectorElementType() == MVT::i8);

  (void)ArgVT;


  // The zvdot4a8i pseudos are defined with sources and destination both

  // being i32.  This cast is needed for correctness to avoid incorrect

  // .vx matching of i8 splats.

  A = DAG.getBitcast(VT, A);

  B = DAG.getBitcast(VT, B);


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);

    A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);

    B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);

  }


  unsigned Opc;

  switch (Op.getOpcode()) {

  case ISD::PARTIAL_REDUCE_SMLA:

    Opc = RISCVISD::VDOT4A_VL;

    break;

  case ISD::PARTIAL_REDUCE_UMLA:

    Opc = RISCVISD::VDOT4AU_VL;

    break;

  case ISD::PARTIAL_REDUCE_SUMLA:

    Opc = RISCVISD::VDOT4ASU_VL;

    break;

  default:

    llvm_unreachable("Unexpected opcode");

  }

  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

  SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});

  if (VT.isFixedLengthVector())

    Res = convertFromScalableVector(VT, Res, DAG, Subtarget);

  return Res;

}


static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,

                             SelectionDAG &DAG, unsigned Flags) {

  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);

}


static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,

                             SelectionDAG &DAG, unsigned Flags) {

  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),

                                   Flags);

}


static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,

                             SelectionDAG &DAG, unsigned Flags) {

  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),

                                   N->getOffset(), Flags);

}


static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,

                             SelectionDAG &DAG, unsigned Flags) {

  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);

}


static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL,

                                     EVT Ty, SelectionDAG &DAG) {

  RISCVConstantPoolValue *CPV = RISCVConstantPoolValue::Create(N->getGlobal());

  SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));

  SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);

  return DAG.getLoad(

      Ty, DL, DAG.getEntryNode(), LC,

      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

}


static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL,

                                      EVT Ty, SelectionDAG &DAG) {

  RISCVConstantPoolValue *CPV =

      RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());

  SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));

  SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);

  return DAG.getLoad(

      Ty, DL, DAG.getEntryNode(), LC,

      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

}


template <class NodeTy>

SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,

                                     bool IsLocal, bool IsExternWeak) const {

  SDLoc DL(N);

  EVT Ty = getPointerTy(DAG.getDataLayout());


  // When HWASAN is used and tagging of global variables is enabled

  // they should be accessed via the GOT, since the tagged address of a global

  // is incompatible with existing code models. This also applies to non-pic

  // mode.

  if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {

    SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);

    if (IsLocal && !Subtarget.allowTaggedGlobals())

      // Use PC-relative addressing to access the symbol. This generates the

      // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))

      // %pcrel_lo(auipc)).

      return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);


    // Use PC-relative addressing to access the GOT for this symbol, then load

    // the address from the GOT. This generates the pattern (PseudoLGA sym),

    // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).

    SDValue Load =

        SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);

    MachineFunction &MF = DAG.getMachineFunction();

    MachineMemOperand *MemOp = MF.getMachineMemOperand(

        MachinePointerInfo::getGOT(MF),

        MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |

            MachineMemOperand::MOInvariant,

        LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));

    DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});

    return Load;

  }


  switch (getTargetMachine().getCodeModel()) {

  default:

    reportFatalUsageError("Unsupported code model for lowering");

  case CodeModel::Small: {

    // Generate a sequence for accessing addresses within the first 2 GiB of

    // address space.

    if (Subtarget.hasVendorXqcili()) {

      // Use QC.E.LI to generate the address, as this is easier to relax than

      // LUI/ADDI.

      SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);

      return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);

    }


    // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).

    SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);

    SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);

    SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);

    return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);

  }

  case CodeModel::Medium: {

    SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);

    if (IsExternWeak) {

      // An extern weak symbol may be undefined, i.e. have value 0, which may

      // not be within 2GiB of PC, so use GOT-indirect addressing to access the

      // symbol. This generates the pattern (PseudoLGA sym), which expands to

      // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).

      SDValue Load =

          SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);

      MachineFunction &MF = DAG.getMachineFunction();

      MachineMemOperand *MemOp = MF.getMachineMemOperand(

          MachinePointerInfo::getGOT(MF),

          MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |

              MachineMemOperand::MOInvariant,

          LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));

      DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});

      return Load;

    }


    // Generate a sequence for accessing addresses within any 2GiB range within

    // the address space. This generates the pattern (PseudoLLA sym), which

    // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).

    return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);

  }

  case CodeModel::Large: {

    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))

      return getLargeGlobalAddress(G, DL, Ty, DAG);


    // Using pc-relative mode for other node type.

    SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);

    return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);

  }

  }

}


SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,

                                                SelectionDAG &DAG) const {

  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);

  assert(N->getOffset() == 0 && "unexpected offset in global node");

  const GlobalValue *GV = N->getGlobal();

  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(GV);

  return getAddr(N, DAG, IsLocal, GV->hasExternalWeakLinkage());

}


SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,

                                               SelectionDAG &DAG) const {

  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);


  return getAddr(N, DAG);

}


SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,

                                               SelectionDAG &DAG) const {

  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);


  return getAddr(N, DAG);

}


SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,

                                            SelectionDAG &DAG) const {

  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);


  return getAddr(N, DAG);

}


SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,

                                              SelectionDAG &DAG,

                                              bool UseGOT) const {

  SDLoc DL(N);

  EVT Ty = getPointerTy(DAG.getDataLayout());

  const GlobalValue *GV = N->getGlobal();

  MVT XLenVT = Subtarget.getXLenVT();


  if (UseGOT) {

    // Use PC-relative addressing to access the GOT for this TLS symbol, then

    // load the address from the GOT and add the thread pointer. This generates

    // the pattern (PseudoLA_TLS_IE sym), which expands to

    // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).

    SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);

    SDValue Load =

        SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);

    MachineFunction &MF = DAG.getMachineFunction();

    MachineMemOperand *MemOp = MF.getMachineMemOperand(

        MachinePointerInfo::getGOT(MF),

        MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |

            MachineMemOperand::MOInvariant,

        LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));

    DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});


    // Add the thread pointer.

    SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);

    return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);

  }


  // Generate a sequence for accessing the address relative to the thread

  // pointer, with the appropriate adjustment for the thread pointer offset.

  // This generates the pattern

  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))

  SDValue AddrHi =

      DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);

  SDValue AddrAdd =

      DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);

  SDValue AddrLo =

      DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);


  SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);

  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);

  SDValue MNAdd =

      DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);

  return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);

}


SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,

                                               SelectionDAG &DAG) const {

  SDLoc DL(N);

  EVT Ty = getPointerTy(DAG.getDataLayout());

  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());

  const GlobalValue *GV = N->getGlobal();


  // Use a PC-relative addressing mode to access the global dynamic GOT address.

  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to

  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).

  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);

  SDValue Load =

      SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);


  // Prepare argument list to generate call.

  ArgListTy Args;

  Args.emplace_back(Load, CallTy);


  // Setup call to __tls_get_addr.

  TargetLowering::CallLoweringInfo CLI(DAG);

  CLI.setDebugLoc(DL)

      .setChain(DAG.getEntryNode())

      .setLibCallee(CallingConv::C, CallTy,

                    DAG.getExternalSymbol("__tls_get_addr", Ty),

                    std::move(Args));


  return LowerCallTo(CLI).first;

}


SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,

                                            SelectionDAG &DAG) const {

  SDLoc DL(N);

  EVT Ty = getPointerTy(DAG.getDataLayout());

  const GlobalValue *GV = N->getGlobal();


  // Use a PC-relative addressing mode to access the global dynamic GOT address.

  // This generates the pattern (PseudoLA_TLSDESC sym), which expands to

  //

  // auipc tX, %tlsdesc_hi(symbol)         // R_RISCV_TLSDESC_HI20(symbol)

  // lw    tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)

  // addi  a0, tX, %tlsdesc_add_lo(label)  // R_RISCV_TLSDESC_ADD_LO12(label)

  // jalr  t0, tY                          // R_RISCV_TLSDESC_CALL(label)

  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);

  return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);

}


SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,

                                                   SelectionDAG &DAG) const {

  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);

  assert(N->getOffset() == 0 && "unexpected offset in global node");


  if (DAG.getTarget().useEmulatedTLS())

    return LowerToTLSEmulatedModel(N, DAG);


  TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());


  if (DAG.getMachineFunction().getFunction().getCallingConv() ==

      CallingConv::GHC)

    reportFatalUsageError("In GHC calling convention TLS is not supported");


  SDValue Addr;

  switch (Model) {

  case TLSModel::LocalExec:

    Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);

    break;

  case TLSModel::InitialExec:

    Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);

    break;

  case TLSModel::LocalDynamic:

  case TLSModel::GeneralDynamic:

    Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)

                                        : getDynamicTLSAddr(N, DAG);

    break;

  }


  return Addr;

}


// Return true if Val is equal to (setcc LHS, RHS, CC).

// Return false if Val is the inverse of (setcc LHS, RHS, CC).

// Otherwise, return std::nullopt.


static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,

                                      ISD::CondCode CC, SDValue Val) {

  assert(Val->getOpcode() == ISD::SETCC);

  SDValue LHS2 = Val.getOperand(0);

  SDValue RHS2 = Val.getOperand(1);

  ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();


  if (LHS == LHS2 && RHS == RHS2) {

    if (CC == CC2)

      return true;

    if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))

      return false;

  } else if (LHS == RHS2 && RHS == LHS2) {

    CC2 = ISD::getSetCCSwappedOperands(CC2);

    if (CC == CC2)

      return true;

    if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))

      return false;

  }


  return std::nullopt;

}


static bool isSimm12Constant(SDValue V) {

  return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);

}


static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG,

                                  const RISCVSubtarget &Subtarget) {

  SDValue CondV = N->getOperand(0);

  SDValue TrueV = N->getOperand(1);

  SDValue FalseV = N->getOperand(2);

  MVT VT = N->getSimpleValueType(0);

  SDLoc DL(N);


  if (!Subtarget.hasConditionalMoveFusion()) {

    // (select c, -1, y) -> -c | y

    if (isAllOnesConstant(TrueV)) {

      SDValue Neg = DAG.getNegative(CondV, DL, VT);

      return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));

    }

    // (select c, y, -1) -> (c-1) | y

    if (isAllOnesConstant(FalseV)) {

      SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,

                                DAG.getAllOnesConstant(DL, VT));

      return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));

    }


    const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();


    // (select c, 0, y) -> (c-1) & y

    if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {

      SDValue Neg =

          DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));

      return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));

    }

    if (isNullConstant(FalseV)) {

      if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {

        // (select c, y, 0) -> (c * (y - 1)) + c

        int64_t MulImm = TrueC->getSExtValue();

        if (MulImm != INT64_MIN && isInt<12>(MulImm - 1) &&

            Subtarget.hasVendorXqciac())

          return DAG.getNode(RISCVISD::QC_MULIADD, DL, VT, CondV, CondV,

                             DAG.getSignedTargetConstant(MulImm - 1, DL, VT));


        // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c

        uint64_t TrueM1 = TrueC->getZExtValue() - 1;

        if (isPowerOf2_64(TrueM1)) {

          unsigned ShAmount = Log2_64(TrueM1);

          if (Subtarget.hasShlAdd(ShAmount))

            return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,

                               DAG.getTargetConstant(ShAmount, DL, VT), CondV);

        }

      }

      // (select c, y, 0) -> -c & y

      if (!HasCZero || isSimm12Constant(TrueV)) {

        SDValue Neg = DAG.getNegative(CondV, DL, VT);

        return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));

      }

    }

  }


  // select c, ~x, x --> xor -c, x

  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {

    const APInt &TrueVal = TrueV->getAsAPIntVal();

    const APInt &FalseVal = FalseV->getAsAPIntVal();

    if (~TrueVal == FalseVal) {

      SDValue Neg = DAG.getNegative(CondV, DL, VT);

      return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);

    }

  }


  // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops

  // when both truev and falsev are also setcc.

  if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&

      FalseV.getOpcode() == ISD::SETCC) {

    SDValue LHS = CondV.getOperand(0);

    SDValue RHS = CondV.getOperand(1);

    ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();


    // (select x, x, y) -> x | y

    // (select !x, x, y) -> x & y

    if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {

      return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,

                         DAG.getFreeze(FalseV));

    }

    // (select x, y, x) -> x & y

    // (select !x, y, x) -> x | y

    if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {

      return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,

                         DAG.getFreeze(TrueV), FalseV);

    }

  }


  return SDValue();

}


// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants

// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.

// For now we only consider transformation profitable if `binOp(c0, c1)` ends up

// being `0` or `-1`. In such cases we can replace `select` with `and`.

// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize

// than `c0`?

static SDValue


foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,

                                const RISCVSubtarget &Subtarget) {

  if (Subtarget.hasShortForwardBranchIALU())

    return SDValue();


  unsigned SelOpNo = 0;

  SDValue Sel = BO->getOperand(0);

  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {

    SelOpNo = 1;

    Sel = BO->getOperand(1);

  }


  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())

    return SDValue();


  unsigned ConstSelOpNo = 1;

  unsigned OtherSelOpNo = 2;

  if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {

    ConstSelOpNo = 2;

    OtherSelOpNo = 1;

  }

  SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);

  ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);

  if (!ConstSelOpNode || ConstSelOpNode->isOpaque())

    return SDValue();


  SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);

  ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);

  if (!ConstBinOpNode || ConstBinOpNode->isOpaque())

    return SDValue();


  SDLoc DL(Sel);

  EVT VT = BO->getValueType(0);


  SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};

  if (SelOpNo == 1)

    std::swap(NewConstOps[0], NewConstOps[1]);


  SDValue NewConstOp =

      DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);

  if (!NewConstOp)

    return SDValue();


  const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();

  if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())

    return SDValue();


  SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);

  SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};

  if (SelOpNo == 1)

    std::swap(NewNonConstOps[0], NewNonConstOps[1]);

  SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);


  SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;

  SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;

  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);

}


SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {

  SDValue CondV = Op.getOperand(0);

  SDValue TrueV = Op.getOperand(1);

  SDValue FalseV = Op.getOperand(2);

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  // Handle P extension packed types by bitcasting to an integer of

  // matching width and reusing the scalar selection mechanism.

  // Reachable cases:

  //   RV32: v4i8/v2i16          -> select on i32

  //   RV32: v8i8/v4i16          -> select on i64 (legalizes to two i32 selects)

  //   RV64: v8i8/v4i16/v2i32    -> select on i64

  if (Subtarget.isPExtPackedType(VT)) {

    MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());

    SDValue TrueVInt = DAG.getBitcast(IntVT, TrueV);

    SDValue FalseVInt = DAG.getBitcast(IntVT, FalseV);

    SDValue ResultInt =

        DAG.getNode(ISD::SELECT, DL, IntVT, CondV, TrueVInt, FalseVInt);

    return DAG.getBitcast(VT, ResultInt);

  }


  // Lower vector SELECTs to VSELECTs by splatting the condition.

  if (VT.isVector()) {

    MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);

    SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);

    return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);

  }


  // Try some other optimizations before falling back to generic lowering.

  if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))

    return V;


  // When there is no cost for GPR <-> FPR, we can use zicond select for

  // floating value when CondV is int type

  bool FPinGPR = Subtarget.hasStdExtZfinx();


  // We can handle FGPR without spliting into hi/lo parts

  bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(),

                                       Subtarget.getXLenVT().getSizeInBits());


  bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR &&

                           VT.isFloatingPoint() && FitsInGPR;


  if (UseZicondForFPSel) {


    auto CastToInt = [&](SDValue V) -> SDValue {

      // Treat +0.0 as int 0 to enable single 'czero' instruction generation.

      if (isNullFPConstant(V))

        return DAG.getConstant(0, DL, XLenVT);


      if (VT == MVT::f16)

        return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V);


      if (VT == MVT::f32 && Subtarget.is64Bit())

        return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V);


      return DAG.getBitcast(XLenVT, V);

    };


    SDValue TrueVInt = CastToInt(TrueV);

    SDValue FalseVInt = CastToInt(FalseV);


    // Emit integer SELECT (lowers to Zicond)

    SDValue ResultInt =

        DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);


    // Convert back to floating VT

    if (VT == MVT::f32 && Subtarget.is64Bit())

      return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt);


    if (VT == MVT::f16)

      return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt);


    return DAG.getBitcast(VT, ResultInt);

  }


  // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ

  // nodes to implement the SELECT. Performing the lowering here allows for

  // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless

  // sequence or RISCVISD::SELECT_CC node (branch-based select).

  if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {


    // (select c, t, 0) -> (czero_eqz t, c)

    if (isNullConstant(FalseV))

      return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);

    // (select c, 0, f) -> (czero_nez f, c)

    if (isNullConstant(TrueV))

      return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);


    // Check to see if a given operation is a 'NOT', if so return the negated

    // operand

    auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {

      using namespace llvm::SDPatternMatch;

      SDValue Xor;

      if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {

        return Xor;

      }

      return std::nullopt;

    };

    // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))

    // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))

    if (TrueV.getOpcode() == ISD::AND &&

        (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {

      auto NotOperand = (TrueV.getOperand(0) == FalseV)

                            ? getNotOperand(TrueV.getOperand(1))

                            : getNotOperand(TrueV.getOperand(0));

      if (NotOperand) {

        SDValue CMOV =

            DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);

        SDValue NOT = DAG.getNOT(DL, CMOV, VT);

        return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);

      }

      return DAG.getNode(

          ISD::OR, DL, VT, TrueV,

          DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));

    }


    // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))

    // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))

    if (FalseV.getOpcode() == ISD::AND &&

        (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {

      auto NotOperand = (FalseV.getOperand(0) == TrueV)

                            ? getNotOperand(FalseV.getOperand(1))

                            : getNotOperand(FalseV.getOperand(0));

      if (NotOperand) {

        SDValue CMOV =

            DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);

        SDValue NOT = DAG.getNOT(DL, CMOV, VT);

        return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);

      }

      return DAG.getNode(

          ISD::OR, DL, VT, FalseV,

          DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));

    }


    // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)

    // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)

    if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {

      const APInt &TrueVal = TrueV->getAsAPIntVal();

      const APInt &FalseVal = FalseV->getAsAPIntVal();


      // Prefer these over Zicond to avoid materializing an immediate:

      //   (select (x < 0), y, z)  -> x >> (XLEN - 1) & (y - z) + z

      //   (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z

      if (CondV.getOpcode() == ISD::SETCC &&

          CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {

        ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();

        if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||

            (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {

          int64_t TrueImm = TrueVal.getSExtValue();

          int64_t FalseImm = FalseVal.getSExtValue();

          if (CCVal == ISD::SETGT)

            std::swap(TrueImm, FalseImm);

          if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&

              isInt<12>(TrueImm - FalseImm)) {

            SDValue SRA =

                DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),

                            DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));

            SDValue AND =

                DAG.getNode(ISD::AND, DL, VT, SRA,

                            DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));

            return DAG.getNode(ISD::ADD, DL, VT, AND,

                               DAG.getSignedConstant(FalseImm, DL, VT));

          }

        }

      }


      // Use SHL/ADDI (and possible XORI) to avoid having to materialize

      // a constant in register

      if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {

        SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);

        SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);

        return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);

      }

      if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {

        SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);

        CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));

        SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);

        return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);

      }


      auto getCost = [&](const APInt &Delta, const APInt &Addend) {

        const int DeltaCost = RISCVMatInt::getIntMatCost(

            Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);

        // Does the addend fold into an ADDI

        if (Addend.isSignedIntN(12))

          return DeltaCost;

        const int AddendCost = RISCVMatInt::getIntMatCost(

            Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);

        return AddendCost + DeltaCost;

      };

      bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=

                         getCost(TrueVal - FalseVal, FalseVal);

      SDValue LHSVal = DAG.getConstant(

          IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);

      SDValue CMOV =

          DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,

                      DL, VT, LHSVal, CondV);

      return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);

    }


    // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)

    // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)

    if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {

      bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);

      SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;

      SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;

      int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();

      // Efficient only if the constant and its negation fit into `ADDI`

      // Prefer Add/Sub over Xor since can be compressed for small immediates

      if (isInt<12>(RawConstVal)) {

        // Fall back to XORI if Const == -0x800 since we don't have SUBI.

        unsigned SubOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::SUB;

        unsigned AddOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::ADD;

        SDValue SubOp = DAG.getNode(SubOpc, DL, VT, RegV, ConstVal);

        SDValue CZERO =

            DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,

                        DL, VT, SubOp, CondV);

        return DAG.getNode(AddOpc, DL, VT, CZERO, ConstVal);

      }

    }


    // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))

    // Unless we have the short forward branch optimization.

    if (!Subtarget.hasConditionalMoveFusion())

      return DAG.getNode(

          ISD::OR, DL, VT,

          DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),

          DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),

          SDNodeFlags::Disjoint);

  }


  if (Op.hasOneUse()) {

    unsigned UseOpc = Op->user_begin()->getOpcode();

    if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {

      SDNode *BinOp = *Op->user_begin();

      if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),

                                                           DAG, Subtarget)) {

        DAG.ReplaceAllUsesWith(BinOp, &NewSel);

        // Opcode check is necessary because foldBinOpIntoSelectIfProfitable

        // may return a constant node and cause crash in lowerSELECT.

        if (NewSel.getOpcode() == ISD::SELECT)

          return lowerSELECT(NewSel, DAG);

        return NewSel;

      }

    }

  }


  // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))

  // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))

  const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);

  const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);

  if (FPTV && FPFV) {

    if (FPTV->isExactlyValue(1.0) && FPFV->isPosZero())

      return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);

    if (FPTV->isPosZero() && FPFV->isExactlyValue(1.0)) {

      SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,

                                DAG.getConstant(1, DL, XLenVT));

      return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);

    }

  }


  // If the condition is not an integer SETCC which operates on XLenVT, we need

  // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:

  // (select condv, truev, falsev)

  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)

  if (CondV.getOpcode() != ISD::SETCC ||

      CondV.getOperand(0).getSimpleValueType() != XLenVT) {

    SDValue Zero = DAG.getConstant(0, DL, XLenVT);

    SDValue SetNE = DAG.getCondCode(ISD::SETNE);


    SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};


    return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);

  }


  // If the CondV is the output of a SETCC node which operates on XLenVT inputs,

  // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take

  // advantage of the integer compare+branch instructions. i.e.:

  // (select (setcc lhs, rhs, cc), truev, falsev)

  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)

  SDValue LHS = CondV.getOperand(0);

  SDValue RHS = CondV.getOperand(1);

  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();


  // Special case for a select of 2 constants that have a difference of 1.

  // Normally this is done by DAGCombine, but if the select is introduced by

  // type legalization or op legalization, we miss it. Restricting to SETLT

  // case for now because that is what signed saturating add/sub need.

  // FIXME: We don't need the condition to be SETLT or even a SETCC,

  // but we would probably want to swap the true/false values if the condition

  // is SETGE/SETLE to avoid an XORI.

  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&

      CCVal == ISD::SETLT) {

    const APInt &TrueVal = TrueV->getAsAPIntVal();

    const APInt &FalseVal = FalseV->getAsAPIntVal();

    if (TrueVal - 1 == FalseVal)

      return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);

    if (TrueVal + 1 == FalseVal)

      return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);

  }


  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);

  // 1 < x ? x : 1 -> 0 < x ? x : 1

  if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&

      RHS == TrueV && LHS == FalseV) {

    LHS = DAG.getConstant(0, DL, VT);

    // 0 <u x is the same as x != 0.

    if (CCVal == ISD::SETULT) {

      std::swap(LHS, RHS);

      CCVal = ISD::SETNE;

    }

  }


  // x <s -1 ? x : -1 -> x <s 0 ? x : -1

  if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&

      RHS == FalseV) {

    RHS = DAG.getConstant(0, DL, VT);

  }


  SDValue TargetCC = DAG.getCondCode(CCVal);


  if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {

    // (select (setcc lhs, rhs, CC), constant, falsev)

    // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)

    std::swap(TrueV, FalseV);

    TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));

  }


  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};

  return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);

}


SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {

  SDValue CondV = Op.getOperand(1);

  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();


  if (CondV.getOpcode() == ISD::SETCC &&

      CondV.getOperand(0).getValueType() == XLenVT) {

    SDValue LHS = CondV.getOperand(0);

    SDValue RHS = CondV.getOperand(1);

    ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();


    translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);


    SDValue TargetCC = DAG.getCondCode(CCVal);

    return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),

                       LHS, RHS, TargetCC, Op.getOperand(2));

  }


  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),

                     CondV, DAG.getConstant(0, DL, XLenVT),

                     DAG.getCondCode(ISD::SETNE), Op.getOperand(2));

}


SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();

  RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();


  SDLoc DL(Op);

  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),

                                 getPointerTy(MF.getDataLayout()));


  // vastart just stores the address of the VarArgsFrameIndex slot into the

  // memory location argument.

  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),

                      MachinePointerInfo(SV));

}


SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,

                                            SelectionDAG &DAG) const {

  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  MFI.setFrameAddressIsTaken(true);

  Register FrameReg = RI.getFrameRegister(MF);

  int XLenInBytes = Subtarget.getXLen() / 8;


  EVT VT = Op.getValueType();

  SDLoc DL(Op);

  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);

  unsigned Depth = Op.getConstantOperandVal(0);

  while (Depth--) {

    int Offset = -(XLenInBytes * 2);

    SDValue Ptr = DAG.getNode(

        ISD::ADD, DL, VT, FrameAddr,

        DAG.getSignedConstant(Offset, DL, getPointerTy(DAG.getDataLayout())));

    FrameAddr =

        DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());

  }

  return FrameAddr;

}


SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,

                                             SelectionDAG &DAG) const {

  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  MFI.setReturnAddressIsTaken(true);

  MVT XLenVT = Subtarget.getXLenVT();

  int XLenInBytes = Subtarget.getXLen() / 8;


  EVT VT = Op.getValueType();

  SDLoc DL(Op);

  unsigned Depth = Op.getConstantOperandVal(0);

  if (Depth) {

    int Off = -XLenInBytes;

    SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);

    SDValue Offset = DAG.getSignedConstant(Off, DL, VT);

    return DAG.getLoad(VT, DL, DAG.getEntryNode(),

                       DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),

                       MachinePointerInfo());

  }


  // Return the value of the return address register, marking it an implicit

  // live-in.

  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));

  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);

}


SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,

                                                 SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Lo = Op.getOperand(0);

  SDValue Hi = Op.getOperand(1);

  SDValue Shamt = Op.getOperand(2);

  EVT VT = Lo.getValueType();

  unsigned XLen = Subtarget.getXLen();


  // With P extension, use SLX (FSHL) for the high part.

  if (Subtarget.hasStdExtP()) {

    // HiRes = fshl(Hi, Lo, Shamt) - correct when Shamt < XLen

    SDValue HiRes = DAG.getNode(ISD::FSHL, DL, VT, Hi, Lo, Shamt);

    // LoRes = Lo << Shamt - correct Lo when Shamt < XLen,

    // Mask shift amount to avoid UB when Shamt >= XLen.

    SDValue ShamtMasked =

        DAG.getNode(ISD::AND, DL, VT, Shamt, DAG.getConstant(XLen - 1, DL, VT));

    SDValue LoRes = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMasked);


    // Create a mask that is -1 when Shamt >= XLen, 0 otherwise.

    // FIXME: We should use a select and let LowerSelect make the

    // optimizations.

    SDValue ShAmtExt =

        DAG.getNode(ISD::SHL, DL, VT, Shamt,

                    DAG.getConstant(XLen - Log2_32(XLen) - 1, DL, VT));

    SDValue Mask = DAG.getNode(ISD::SRA, DL, VT, ShAmtExt,

                               DAG.getConstant(XLen - 1, DL, VT));


    // When Shamt >= XLen: HiRes = LoRes, LoRes = 0

    // HiRes = (HiRes & ~Mask) | (LoRes & Mask)

    SDValue HiMasked =

        DAG.getNode(ISD::AND, DL, VT, HiRes, DAG.getNOT(DL, Mask, VT));

    SDValue LoMasked = DAG.getNode(ISD::AND, DL, VT, LoRes, Mask);

    HiRes =

        DAG.getNode(ISD::OR, DL, VT, HiMasked, LoMasked, SDNodeFlags::Disjoint);


    // LoRes = LoRes & ~Mask (clear when Shamt >= XLen)

    LoRes = DAG.getNode(ISD::AND, DL, VT, LoRes, DAG.getNOT(DL, Mask, VT));


    return DAG.getMergeValues({LoRes, HiRes}, DL);

  }


  // if Shamt-XLEN < 0: // Shamt < XLEN

  //   Lo = Lo << Shamt

  //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))

  // else:

  //   Lo = 0

  //   Hi = Lo << (Shamt-XLEN)


  SDValue Zero = DAG.getConstant(0, DL, VT);

  SDValue One = DAG.getConstant(1, DL, VT);

  SDValue MinusXLen = DAG.getSignedConstant(-(int)XLen, DL, VT);

  SDValue XLenMinus1 = DAG.getConstant(XLen - 1, DL, VT);

  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);

  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);


  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);

  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);

  SDValue ShiftRightLo =

      DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);

  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);

  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);

  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);


  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);


  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);

  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);


  SDValue Parts[2] = {Lo, Hi};

  return DAG.getMergeValues(Parts, DL);

}


SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,

                                                  bool IsSRA) const {

  SDLoc DL(Op);

  SDValue Lo = Op.getOperand(0);

  SDValue Hi = Op.getOperand(1);

  SDValue Shamt = Op.getOperand(2);

  EVT VT = Lo.getValueType();


  // With P extension, use NSRL/NSRA for RV32 or FSHR (SRX) for RV64.

  if (Subtarget.hasStdExtP()) {

    unsigned XLen = Subtarget.getXLen();


    SDValue LoRes;

    if (Subtarget.is64Bit()) {

      // On RV64, use FSHR (SRX instruction) for the low part. We will need

      // to fix this later if ShAmt >= 64.

      LoRes = DAG.getNode(ISD::FSHR, DL, VT, Hi, Lo, Shamt);

    } else {

      // On RV32, use NSRL/NSRA for the low part.

      // NSRL/NSRA read 6 bits of shift amount, so they handle Shamt >= 32

      // correctly.

      LoRes = DAG.getNode(IsSRA ? RISCVISD::NSRA : RISCVISD::NSRL, DL, VT, Lo,

                          Hi, Shamt);

    }


    // Mask shift amount to avoid UB when Shamt >= XLen.

    SDValue ShamtMasked =

        DAG.getNode(ISD::AND, DL, VT, Shamt, DAG.getConstant(XLen - 1, DL, VT));

    SDValue HiRes =

        DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, ShamtMasked);


    // Create a mask that is -1 when Shamt >= XLen, 0 otherwise.

    // FIXME: We should use a select and let LowerSelect make the

    // optimizations.

    SDValue ShAmtExt =

        DAG.getNode(ISD::SHL, DL, VT, Shamt,

                    DAG.getConstant(XLen - Log2_32(XLen) - 1, DL, VT));

    SDValue Mask = DAG.getNode(ISD::SRA, DL, VT, ShAmtExt,

                               DAG.getConstant(XLen - 1, DL, VT));


    if (Subtarget.is64Bit()) {

      // On RV64, FSHR masks shift amount to 63. We need to replace LoRes

      // with HiRes when Shamt >= 64.

      // LoRes = (LoRes & ~Mask) | (HiRes & Mask)

      SDValue LoMasked =

          DAG.getNode(ISD::AND, DL, VT, LoRes, DAG.getNOT(DL, Mask, VT));

      SDValue HiMasked = DAG.getNode(ISD::AND, DL, VT, HiRes, Mask);

      LoRes = DAG.getNode(ISD::OR, DL, VT, LoMasked, HiMasked,

                          SDNodeFlags::Disjoint);

    }


    // If ShAmt >= XLen, we need to replace HiRes with 0 or sign bits.

    if (IsSRA) {

      // sra hi, hi, (mask & (XLen-1)) - shifts by XLen-1 when shamt >= XLen

      SDValue MaskAmt = DAG.getNode(ISD::AND, DL, VT, Mask,

                                    DAG.getConstant(XLen - 1, DL, VT));

      HiRes = DAG.getNode(ISD::SRA, DL, VT, HiRes, MaskAmt);

    } else {

      // andn hi, hi, mask - clears hi when shamt >= XLen

      HiRes = DAG.getNode(ISD::AND, DL, VT, HiRes, DAG.getNOT(DL, Mask, VT));

    }


    return DAG.getMergeValues({LoRes, HiRes}, DL);

  }


  // SRA expansion:

  //   if Shamt-XLEN < 0: // Shamt < XLEN

  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))

  //     Hi = Hi >>s Shamt

  //   else:

  //     Lo = Hi >>s (Shamt-XLEN);

  //     Hi = Hi >>s (XLEN-1)

  //

  // SRL expansion:

  //   if Shamt-XLEN < 0: // Shamt < XLEN

  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))

  //     Hi = Hi >>u Shamt

  //   else:

  //     Lo = Hi >>u (Shamt-XLEN);

  //     Hi = 0;


  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;


  SDValue Zero = DAG.getConstant(0, DL, VT);

  SDValue One = DAG.getConstant(1, DL, VT);

  SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);

  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);

  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);

  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);


  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);

  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);

  SDValue ShiftLeftHi =

      DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);

  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);

  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);

  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);

  SDValue HiFalse =

      IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;


  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);


  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);

  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);


  SDValue Parts[2] = {Lo, Hi};

  return DAG.getMergeValues(Parts, DL);

}


// Lower splats of i1 types to SETCC. For each mask vector type, we have a

// legal equivalently-sized i8 type, so we can use that as a go-between.

SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,

                                                  SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  SDValue SplatVal = Op.getOperand(0);

  // All-zeros or all-ones splats are handled specially.

  if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {

    SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;

    return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);

  }

  if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {

    SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;

    return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);

  }

  MVT InterVT = VT.changeVectorElementType(MVT::i8);

  SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,

                         DAG.getConstant(1, DL, SplatVal.getValueType()));

  SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);

  SDValue Zero = DAG.getConstant(0, DL, InterVT);

  return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);

}


// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is

// illegal (currently only vXi64 RV32).

// FIXME: We could also catch non-constant sign-extended i32 values and lower

// them to VMV_V_X_VL.

SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,

                                                     SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VecVT = Op.getSimpleValueType();

  assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&

         "Unexpected SPLAT_VECTOR_PARTS lowering");


  assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");

  SDValue Lo = Op.getOperand(0);

  SDValue Hi = Op.getOperand(1);


  MVT ContainerVT = VecVT;

  if (VecVT.isFixedLengthVector())

    ContainerVT = getContainerForFixedLengthVector(VecVT);


  auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;


  SDValue Res =

      splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);


  if (VecVT.isFixedLengthVector())

    Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);


  return Res;

}


// Custom-lower extensions from mask vectors by using a vselect either with 1

// for zero/any-extension or -1 for sign-extension:

//   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)

// Note that any-extension is lowered identically to zero-extension.

SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,

                                                int64_t ExtTrueVal) const {

  SDLoc DL(Op);

  MVT VecVT = Op.getSimpleValueType();

  SDValue Src = Op.getOperand(0);

  // Only custom-lower extensions from mask types

  assert(Src.getValueType().isVector() &&

         Src.getValueType().getVectorElementType() == MVT::i1);


  if (VecVT.isScalableVector()) {

    SDValue SplatZero = DAG.getConstant(0, DL, VecVT);

    SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);

    if (Src.getOpcode() == ISD::XOR &&

        ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))

      return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,

                         SplatTrueVal);

    return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);

  }


  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);

  MVT I1ContainerVT =

      MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());


  SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);


  SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;


  MVT XLenVT = Subtarget.getXLenVT();

  SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);

  SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);


  if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {

    SDValue Xor = Src.getOperand(0);

    if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {

      SDValue ScalableOnes = Xor.getOperand(1);

      if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&

          ScalableOnes.getOperand(0).isUndef() &&

          ISD::isConstantSplatVectorAllOnes(

              ScalableOnes.getOperand(1).getNode())) {

        CC = Xor.getOperand(0);

        std::swap(SplatZero, SplatTrueVal);

      }

    }

  }


  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                          DAG.getUNDEF(ContainerVT), SplatZero, VL);

  SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                             DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);

  SDValue Select =

      DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,

                  SplatZero, DAG.getUNDEF(ContainerVT), VL);


  return convertFromScalableVector(VecVT, Select, DAG, Subtarget);

}


// Custom-lower truncations from vectors to mask vectors by using a mask and a

// setcc operation:

//   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)

SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,

                                                  SelectionDAG &DAG) const {

  SDLoc DL(Op);

  EVT MaskVT = Op.getValueType();

  // Only expect to custom-lower truncations to mask types

  assert(MaskVT.isVectorOf(MVT::i1) &&

         "Unexpected type for vector mask lowering");

  SDValue Src = Op.getOperand(0);

  MVT VecVT = Src.getSimpleValueType();

  // If this is a fixed vector, we need to convert it to a scalable vector.

  MVT ContainerVT = VecVT;


  if (VecVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VecVT);

    Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);


  SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());

  SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());


  SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                         DAG.getUNDEF(ContainerVT), SplatOne, VL);

  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                          DAG.getUNDEF(ContainerVT), SplatZero, VL);


  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);

  SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,

                              DAG.getUNDEF(ContainerVT), Mask, VL);

  Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,

                      {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),

                       DAG.getUNDEF(MaskContainerVT), Mask, VL});

  if (MaskVT.isFixedLengthVector())

    Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);

  return Trunc;

}


SDValue RISCVTargetLowering::lowerVectorTrunc(SDValue Op,

                                              SelectionDAG &DAG) const {

  unsigned Opc = Op.getOpcode();

  SDLoc DL(Op);


  MVT VT = Op.getSimpleValueType();

  // Only custom-lower vector truncates

  assert(VT.isVector() && "Unexpected type for vector truncate lowering");


  // Truncates to mask types are handled differently

  if (VT.getVectorElementType() == MVT::i1)

    return lowerVectorMaskTrunc(Op, DAG);


  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary

  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which

  // truncate by one power of two at a time.

  MVT DstEltVT = VT.getVectorElementType();


  SDValue Src = Op.getOperand(0);

  MVT SrcVT = Src.getSimpleValueType();

  MVT SrcEltVT = SrcVT.getVectorElementType();


  assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&

         isPowerOf2_64(SrcEltVT.getSizeInBits()) &&

         "Unexpected vector truncate lowering");


  MVT ContainerVT = SrcVT;

  if (SrcVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(SrcVT);

    Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

  }


  SDValue Result = Src;

  auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);


  unsigned NewOpc;

  if (Opc == ISD::TRUNCATE_SSAT_S)

    NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;

  else if (Opc == ISD::TRUNCATE_USAT_U)

    NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;

  else

    NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;


  do {

    SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);

    MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);

    Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);

  } while (SrcEltVT != DstEltVT);


  if (SrcVT.isFixedLengthVector())

    Result = convertFromScalableVector(VT, Result, DAG, Subtarget);


  return Result;

}


SDValue

RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,

                                                    SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Chain = Op.getOperand(0);

  SDValue Src = Op.getOperand(1);

  MVT VT = Op.getSimpleValueType();

  MVT SrcVT = Src.getSimpleValueType();

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);

    ContainerVT =

        SrcContainerVT.changeVectorElementType(VT.getVectorElementType());

    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);


  // RVV can only widen/truncate fp to types double/half the size as the source.

  if ((VT.getVectorElementType() == MVT::f64 &&

       (SrcVT.getVectorElementType() == MVT::f16 ||

        SrcVT.getVectorElementType() == MVT::bf16)) ||

      ((VT.getVectorElementType() == MVT::f16 ||

        VT.getVectorElementType() == MVT::bf16) &&

       SrcVT.getVectorElementType() == MVT::f64)) {

    // For double rounding, the intermediate rounding should be round-to-odd.

    unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND

                                ? RISCVISD::STRICT_FP_EXTEND_VL

                                : RISCVISD::STRICT_VFNCVT_ROD_VL;

    MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);

    Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),

                      Chain, Src, Mask, VL);

    Chain = Src.getValue(1);

  }


  unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND

                         ? RISCVISD::STRICT_FP_EXTEND_VL

                         : RISCVISD::STRICT_FP_ROUND_VL;

  SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),

                            Chain, Src, Mask, VL);

  if (VT.isFixedLengthVector()) {

    // StrictFP operations have two result values. Their lowered result should

    // have same result count.

    SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);

    Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);

  }

  return Res;

}


SDValue

RISCVTargetLowering::lowerVectorFPExtendOrRound(SDValue Op,

                                                SelectionDAG &DAG) const {

  bool IsExtend = Op.getOpcode() == ISD::FP_EXTEND;

  // RVV can only do truncate fp to types half the size as the source. We

  // custom-lower f64->f16 rounds via RVV's round-to-odd float

  // conversion instruction.

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();


  assert(VT.isVector() && "Unexpected type for vector truncate lowering");


  SDValue Src = Op.getOperand(0);

  MVT SrcVT = Src.getSimpleValueType();


  bool IsDirectExtend =

      IsExtend && (VT.getVectorElementType() != MVT::f64 ||

                   (SrcVT.getVectorElementType() != MVT::f16 &&

                    SrcVT.getVectorElementType() != MVT::bf16));

  bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&

                                      VT.getVectorElementType() != MVT::bf16) ||

                                     SrcVT.getVectorElementType() != MVT::f64);


  bool IsDirectConv = IsDirectExtend || IsDirectTrunc;


  // We have regular SD node patterns for direct non-VL extends.

  if (VT.isScalableVector() && IsDirectConv)

    return Op;


  // Prepare any fixed-length vector operands.

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);

    ContainerVT =

        SrcContainerVT.changeVectorElementType(VT.getVectorElementType());

    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);


  unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;


  if (IsDirectConv) {

    Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);

    if (VT.isFixedLengthVector())

      Src = convertFromScalableVector(VT, Src, DAG, Subtarget);

    return Src;

  }


  unsigned InterConvOpc =

      IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;


  MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);

  SDValue IntermediateConv =

      DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);

  SDValue Result =

      DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);

  if (VT.isFixedLengthVector())

    return convertFromScalableVector(VT, Result, DAG, Subtarget);

  return Result;

}


// Given a scalable vector type and an index into it, returns the type for the

// smallest subvector that the index fits in. This can be used to reduce LMUL

// for operations like vslidedown.

//

// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.

static std::optional<MVT>


getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,

                      const RISCVSubtarget &Subtarget) {

  assert(VecVT.isScalableVector());

  const unsigned EltSize = VecVT.getScalarSizeInBits();

  const unsigned VectorBitsMin = Subtarget.getRealMinVLen();

  const unsigned MinVLMAX = VectorBitsMin / EltSize;

  MVT SmallerVT;

  if (MaxIdx < MinVLMAX)

    SmallerVT = RISCVTargetLowering::getM1VT(VecVT);

  else if (MaxIdx < MinVLMAX * 2)

    SmallerVT =

        RISCVTargetLowering::getM1VT(VecVT).getDoubleNumVectorElementsVT();

  else if (MaxIdx < MinVLMAX * 4)

    SmallerVT = RISCVTargetLowering::getM1VT(VecVT)

                    .getDoubleNumVectorElementsVT()

                    .getDoubleNumVectorElementsVT();

  if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))

    return std::nullopt;

  return SmallerVT;

}


// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the

// first position of a vector, and that vector is slid up to the insert index.

// By limiting the active vector length to index+1 and merging with the

// original vector (with an undisturbed tail policy for elements >= VL), we

// achieve the desired result of leaving all elements untouched except the one

// at VL-1, which is replaced with the desired value.

SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,

                                                    SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VecVT = Op.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue Vec = Op.getOperand(0);

  SDValue Val = Op.getOperand(1);

  MVT ValVT = Val.getSimpleValueType();

  SDValue Idx = Op.getOperand(2);


  if (VecVT.getVectorElementType() == MVT::i1) {

    // FIXME: For now we just promote to an i8 vector and insert into that,

    // but this is probably not optimal.

    MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());

    Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);

    Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);

    return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);

  }


  if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

      (ValVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {

    // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.

    MVT IntVT = VecVT.changeTypeToInteger();

    SDValue IntInsert = DAG.getNode(

        ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),

        DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);

    return DAG.getBitcast(VecVT, IntInsert);

  }


  if (Subtarget.hasStdExtP() && VecVT.isFixedLengthVector()) {

    auto *IdxC = dyn_cast<ConstantSDNode>(Idx);

    if (!IdxC)

      return SDValue();


    unsigned IdxVal = IdxC->getZExtValue();

    unsigned NumElts = VecVT.getVectorNumElements();

    MVT EltVT = VecVT.getVectorElementType();


    if (!Subtarget.is64Bit() && (VecVT == MVT::v4i16 || VecVT == MVT::v8i8)) {

      unsigned HalfNumElts = NumElts / 2;

      auto [Lo, Hi] = DAG.SplitVector(Vec, DL);

      MVT HalfVT = Lo.getSimpleValueType();

      if (IdxVal < HalfNumElts) {

        SDValue NewLo =

            DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, HalfVT, Lo, Val, Idx);

        return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, NewLo, Hi);

      }

      SDValue NewHi =

          DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, HalfVT, Hi, Val,

                      DAG.getVectorIdxConstant(IdxVal - HalfNumElts, DL));

      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, NewHi);

    }


    Vec = DAG.getBitcast(XLenVT, Vec);

    SDValue ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);


    // For 2-element vectors, BUILD_VECTOR is more efficient since it only needs

    // at most 2 instructions.

    if (NumElts == 2) {

      unsigned EltBits = EltVT.getSizeInBits();

      SDValue Elt0, Elt1;

      if (IdxVal == 0) {

        Elt0 = ExtVal;

        Elt1 = DAG.getNode(ISD::SRL, DL, XLenVT, Vec,

                           DAG.getConstant(EltBits, DL, XLenVT));

      } else {

        Elt0 = Vec;

        Elt1 = ExtVal;

      }

      return DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Elt0, Elt1);

    }


    // For 4/8-element vectors, use MVM(or MERGE) instruction which does bitwise

    // select: rd = (~mask & rd) | (mask & rs1).

    // This generates: slli + lui/li + mvm

    if (NumElts == 4 || NumElts == 8) {

      unsigned EltBits = EltVT.getSizeInBits();

      unsigned ShiftAmt = IdxVal * EltBits;

      uint64_t PosMask = ((1ULL << EltBits) - 1) << ShiftAmt;


      SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, XLenVT, ExtVal,

                                       DAG.getConstant(ShiftAmt, DL, XLenVT));

      SDValue Mask = DAG.getConstant(PosMask, DL, XLenVT);

      SDValue Result =

          DAG.getNode(RISCVISD::MERGE, DL, XLenVT, Mask, Vec, ShiftedVal);

      return DAG.getBitcast(VecVT, Result);

    }


    return SDValue();

  }


  MVT ContainerVT = VecVT;

  // If the operand is a fixed-length vector, convert to a scalable one.

  if (VecVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

  }


  // If we know the index we're going to insert at, we can shrink Vec so that

  // we're performing the scalar inserts and slideup on a smaller LMUL.

  SDValue OrigVec = Vec;

  std::optional<unsigned> AlignedIdx;

  if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {

    const unsigned OrigIdx = IdxC->getZExtValue();

    // Do we know an upper bound on LMUL?

    if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,

                                              DL, DAG, Subtarget)) {

      ContainerVT = *ShrunkVT;

      AlignedIdx = 0;

    }


    // If we're compiling for an exact VLEN value, we can always perform

    // the insert in m1 as we can determine the register corresponding to

    // the index in the register group.

    const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);

    if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {

      EVT ElemVT = VecVT.getVectorElementType();

      unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();

      unsigned RemIdx = OrigIdx % ElemsPerVReg;

      unsigned SubRegIdx = OrigIdx / ElemsPerVReg;

      AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();

      Idx = DAG.getVectorIdxConstant(RemIdx, DL);

      ContainerVT = M1VT;

    }


    if (AlignedIdx)

      Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);

  }


  bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;

  // Even i64-element vectors on RV32 can be lowered without scalar

  // legalization if the most-significant 32 bits of the value are not affected

  // by the sign-extension of the lower 32 bits. This applies to i32 constants

  // and sign_extend of i32 values.

  if (!IsLegalInsert) {

    if (isa<ConstantSDNode>(Val)) {

      const auto *CVal = cast<ConstantSDNode>(Val);

      if (isInt<32>(CVal->getSExtValue())) {

        IsLegalInsert = true;

        Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);

      }

    } else if (Val.getOpcode() == ISD::SIGN_EXTEND &&

               Val.getOperand(0).getValueType() == MVT::i32) {

      IsLegalInsert = true;

      Val = Val.getOperand(0);

    }

  }


  auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);


  SDValue ValInVec;


  if (IsLegalInsert) {

    unsigned Opc =

        VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;

    if (isNullConstant(Idx)) {

      if (!VecVT.isFloatingPoint())

        Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);

      Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);


      if (AlignedIdx)

        Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);

      if (!VecVT.isFixedLengthVector())

        return Vec;

      return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);

    }


    ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);

  } else {

    // On RV32, i64-element vectors must be specially handled to place the

    // value at element 0, by using two vslide1down instructions in sequence on

    // the i32 split lo/hi value. Use an equivalently-sized i32 vector for

    // this.

    SDValue ValLo, ValHi;

    std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);

    MVT I32ContainerVT =

        MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);

    SDValue I32Mask =

        getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;

    // Limit the active VL to two.

    SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);

    // If the Idx is 0 we can insert directly into the vector.

    if (isNullConstant(Idx)) {

      // First slide in the lo value, then the hi in above it. We use slide1down

      // to avoid the register group overlap constraint of vslide1up.

      ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,

                             Vec, Vec, ValLo, I32Mask, InsertI64VL);

      // If the source vector is undef don't pass along the tail elements from

      // the previous slide1down.

      SDValue Tail = Vec.isUndef() ? Vec : ValInVec;

      ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,

                             Tail, ValInVec, ValHi, I32Mask, InsertI64VL);

      // Bitcast back to the right container type.

      ValInVec = DAG.getBitcast(ContainerVT, ValInVec);


      if (AlignedIdx)

        ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);

      if (!VecVT.isFixedLengthVector())

        return ValInVec;

      return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);

    }


    // First slide in the lo value, then the hi in above it. We use slide1down

    // to avoid the register group overlap constraint of vslide1up.

    ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,

                           DAG.getUNDEF(I32ContainerVT),

                           DAG.getUNDEF(I32ContainerVT), ValLo,

                           I32Mask, InsertI64VL);

    ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,

                           DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,

                           I32Mask, InsertI64VL);

    // Bitcast back to the right container type.

    ValInVec = DAG.getBitcast(ContainerVT, ValInVec);

  }


  // Now that the value is in a vector, slide it into position.

  SDValue InsertVL =

      DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));


  // Use tail agnostic policy if Idx is the last index of Vec.

  unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;

  if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&

      Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())

    Policy = RISCVVType::TAIL_AGNOSTIC;

  SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,

                                Idx, Mask, InsertVL, Policy);


  if (AlignedIdx)

    Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);

  if (!VecVT.isFixedLengthVector())

    return Slideup;

  return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);

}


// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then

// extract the first element: (extractelt (slidedown vec, idx), 0). For integer

// types this is done using VMV_X_S to allow us to glean information about the

// sign bits of the result.

SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,

                                                     SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Idx = Op.getOperand(1);

  SDValue Vec = Op.getOperand(0);

  EVT EltVT = Op.getValueType();

  MVT VecVT = Vec.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  if (VecVT.getVectorElementType() == MVT::i1) {

    // Use vfirst.m to extract the first bit.

    if (isNullConstant(Idx)) {

      MVT ContainerVT = VecVT;

      if (VecVT.isFixedLengthVector()) {

        ContainerVT = getContainerForFixedLengthVector(VecVT);

        Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

      }

      auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);

      SDValue Vfirst =

          DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);

      SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,

                                 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);

      return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);

    }

    if (VecVT.isFixedLengthVector()) {

      unsigned NumElts = VecVT.getVectorNumElements();

      if (NumElts >= 8) {

        MVT WideEltVT;

        unsigned WidenVecLen;

        SDValue ExtractElementIdx;

        SDValue ExtractBitIdx;

        unsigned MaxEEW = Subtarget.getELen();

        MVT LargestEltVT = MVT::getIntegerVT(

            std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));

        if (NumElts <= LargestEltVT.getSizeInBits()) {

          assert(isPowerOf2_32(NumElts) &&

                 "the number of elements should be power of 2");

          WideEltVT = MVT::getIntegerVT(NumElts);

          WidenVecLen = 1;

          ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);

          ExtractBitIdx = Idx;

        } else {

          WideEltVT = LargestEltVT;

          WidenVecLen = NumElts / WideEltVT.getSizeInBits();

          // extract element index = index / element width

          ExtractElementIdx = DAG.getNode(

              ISD::SRL, DL, XLenVT, Idx,

              DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));

          // mask bit index = index % element width

          ExtractBitIdx = DAG.getNode(

              ISD::AND, DL, XLenVT, Idx,

              DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));

        }

        MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);

        Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);

        SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,

                                         Vec, ExtractElementIdx);

        // Extract the bit from GPR.

        SDValue ShiftRight =

            DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);

        SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,

                                  DAG.getConstant(1, DL, XLenVT));

        return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);

      }

    }

    // Otherwise, promote to an i8 vector and extract from that.

    MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());

    Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);

    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);

  }


  if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

      (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {

    // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x

    MVT IntVT = VecVT.changeTypeToInteger();

    SDValue IntVec = DAG.getBitcast(IntVT, Vec);

    SDValue IntExtract =

        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);

    return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);

  }


  if (Subtarget.hasStdExtP() && VecVT.isFixedLengthVector()) {

    if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&

        VecVT != MVT::v4i8 && VecVT != MVT::v2i32)

      return SDValue();


    if (!Subtarget.is64Bit() && (VecVT == MVT::v4i16 || VecVT == MVT::v8i8)) {

      auto *IdxC = dyn_cast<ConstantSDNode>(Idx);

      if (!IdxC)

        return SDValue();

      unsigned IdxVal = IdxC->getZExtValue();

      unsigned HalfNumElts = VecVT.getVectorNumElements() / 2;

      auto [Lo, Hi] = DAG.SplitVector(Vec, DL);

      if (IdxVal < HalfNumElts)

        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Lo, Idx);

      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Hi,

                         DAG.getVectorIdxConstant(IdxVal - HalfNumElts, DL));

    }


    SDValue Extracted = DAG.getBitcast(XLenVT, Vec);

    unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits();

    SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,

                                DAG.getConstant(ElemWidth, DL, XLenVT));

    return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt);

  }


  // If this is a fixed vector, we need to convert it to a scalable vector.

  MVT ContainerVT = VecVT;

  if (VecVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

  }


  // If we're compiling for an exact VLEN value and we have a known

  // constant index, we can always perform the extract in m1 (or

  // smaller) as we can determine the register corresponding to

  // the index in the register group.

  const auto VLen = Subtarget.getRealVLen();

  if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);

      IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {

    MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);

    unsigned OrigIdx = IdxC->getZExtValue();

    EVT ElemVT = VecVT.getVectorElementType();

    unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();

    unsigned RemIdx = OrigIdx % ElemsPerVReg;

    unsigned SubRegIdx = OrigIdx / ElemsPerVReg;

    unsigned ExtractIdx =

        SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();

    Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);

    Idx = DAG.getVectorIdxConstant(RemIdx, DL);

    ContainerVT = M1VT;

  }


  // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which

  // contains our index.

  std::optional<uint64_t> MaxIdx;

  if (VecVT.isFixedLengthVector())

    MaxIdx = VecVT.getVectorNumElements() - 1;

  if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))

    MaxIdx = IdxC->getZExtValue();

  if (MaxIdx) {

    if (auto SmallerVT =

            getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {

      ContainerVT = *SmallerVT;

      Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);

    }

  }


  // If after narrowing, the required slide is still greater than LMUL2,

  // fallback to generic expansion and go through the stack.  This is done

  // for a subtle reason: extracting *all* elements out of a vector is

  // widely expected to be linear in vector size, but because vslidedown

  // is linear in LMUL, performing N extracts using vslidedown becomes

  // O(n^2) / (VLEN/ETYPE) work.  On the surface, going through the stack

  // seems to have the same problem (the store is linear in LMUL), but the

  // generic expansion *memoizes* the store, and thus for many extracts of

  // the same vector we end up with one store and a bunch of loads.

  // TODO: We don't have the same code for insert_vector_elt because we

  // have BUILD_VECTOR and handle the degenerate case there.  Should we

  // consider adding an inverse BUILD_VECTOR node?

  MVT LMUL2VT =

      RISCVTargetLowering::getM1VT(ContainerVT).getDoubleNumVectorElementsVT();

  if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())

    return SDValue();


  // If the index is 0, the vector is already in the right position.

  if (!isNullConstant(Idx)) {

    // Use a VL of 1 to avoid processing more elements than we need.

    auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);

    Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,

                        DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);

  }


  if (!EltVT.isInteger()) {

    // Floating-point extracts are handled in TableGen.

    return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);

  }


  SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);

  return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);

}


// Some RVV intrinsics may claim that they want an integer operand to be

// promoted or expanded.


static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,

                                           const RISCVSubtarget &Subtarget) {

  assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||

          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||

          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&

         "Unexpected opcode");


  if (!Subtarget.hasVInstructions())

    return SDValue();


  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||

                  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;

  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);


  SDLoc DL(Op);


  const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =

      RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);

  if (!II || !II->hasScalarOperand())

    return SDValue();


  unsigned SplatOp = II->ScalarOperand + 1 + HasChain;

  assert(SplatOp < Op.getNumOperands());


  SmallVector<SDValue, 8> Operands(Op->ops());

  SDValue &ScalarOp = Operands[SplatOp];

  MVT OpVT = ScalarOp.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  // If this isn't a scalar, or its type is XLenVT we're done.

  if (!OpVT.isScalarInteger() || OpVT == XLenVT)

    return SDValue();


  // Simplest case is that the operand needs to be promoted to XLenVT.

  if (OpVT.bitsLT(XLenVT)) {

    // If the operand is a constant, sign extend to increase our chances

    // of being able to use a .vi instruction. ANY_EXTEND would become a

    // a zero extend and the simm5 check in isel would fail.

    // FIXME: Should we ignore the upper bits in isel instead?

    unsigned ExtOpc =

        isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;

    ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);

    return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);

  }


  // Use the previous operand to get the vXi64 VT. The result might be a mask

  // VT for compares. Using the previous operand assumes that the previous

  // operand will never have a smaller element size than a scalar operand and

  // that a widening operation never uses SEW=64.

  // NOTE: If this fails the below assert, we can probably just find the

  // element count from any operand or result and use it to construct the VT.

  assert(II->ScalarOperand > 0 && "Unexpected splat operand!");

  MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();


  // The more complex case is when the scalar is larger than XLenVT.

  assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&

         VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");


  // If this is a sign-extended 32-bit value, we can truncate it and rely on the

  // instruction to sign-extend since SEW>XLEN.

  if (DAG.ComputeNumSignBits(ScalarOp) > 32) {

    ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);

    return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);

  }


  switch (IntNo) {

  case Intrinsic::riscv_vslide1up:

  case Intrinsic::riscv_vslide1down:

  case Intrinsic::riscv_vslide1up_mask:

  case Intrinsic::riscv_vslide1down_mask: {

    // We need to special case these when the scalar is larger than XLen.

    unsigned NumOps = Op.getNumOperands();

    bool IsMasked = NumOps == 7;


    // Convert the vector source to the equivalent nxvXi32 vector.

    MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);

    SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);

    SDValue ScalarLo, ScalarHi;

    std::tie(ScalarLo, ScalarHi) =

        DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);


    // Double the VL since we halved SEW.

    SDValue AVL = getVLOperand(Op);

    SDValue I32VL;


    // Optimize for constant AVL

    if (isa<ConstantSDNode>(AVL)) {

      const auto [MinVLMAX, MaxVLMAX] =

          RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);


      uint64_t AVLInt = AVL->getAsZExtVal();

      if (AVLInt <= MinVLMAX) {

        I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);

      } else if (AVLInt >= 2 * MaxVLMAX) {

        // Just set vl to VLMAX in this situation

        I32VL = DAG.getRegister(RISCV::X0, XLenVT);

      } else {

        // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl

        // is related to the hardware implementation.

        // So let the following code handle

      }

    }

    if (!I32VL) {

      RISCVVType::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);

      SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);

      unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());

      SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);

      SDValue SETVL =

          DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);

      // Using vsetvli instruction to get actually used length which related to

      // the hardware implementation

      SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,

                               SEW, LMUL);

      I32VL =

          DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));

    }


    SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);


    // Shift the two scalar parts in using SEW=32 slide1up/slide1down

    // instructions.

    SDValue Passthru;

    if (IsMasked)

      Passthru = DAG.getUNDEF(I32VT);

    else

      Passthru = DAG.getBitcast(I32VT, Operands[1]);


    if (IntNo == Intrinsic::riscv_vslide1up ||

        IntNo == Intrinsic::riscv_vslide1up_mask) {

      Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,

                        ScalarHi, I32Mask, I32VL);

      Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,

                        ScalarLo, I32Mask, I32VL);

    } else {

      Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,

                        ScalarLo, I32Mask, I32VL);

      Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,

                        ScalarHi, I32Mask, I32VL);

    }


    // Convert back to nxvXi64.

    Vec = DAG.getBitcast(VT, Vec);


    if (!IsMasked)

      return Vec;

    // Apply mask after the operation.

    SDValue Mask = Operands[NumOps - 3];

    SDValue MaskedOff = Operands[1];

    // Assume Policy operand is the last operand.

    uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();

    // We don't need to select maskedoff if it's undef.

    if (MaskedOff.isUndef())

      return Vec;

    // TAMU

    if (Policy == RISCVVType::TAIL_AGNOSTIC)

      return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,

                         DAG.getUNDEF(VT), AVL);

    // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.

    // It's fine because vmerge does not care mask policy.

    return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,

                       MaskedOff, AVL);

  }

  }


  // We need to convert the scalar to a splat vector.

  SDValue VL = getVLOperand(Op);

  assert(VL.getValueType() == XLenVT);

  ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);

  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);

}


// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support

// scalable vector llvm.get.vector.length for now.

//

// We need to convert from a scalable VF to a vsetvli with VLMax equal to

// (vscale * VF). The vscale and VF are independent of element width. We use

// SEW=8 for the vsetvli because it is the only element width that supports all

// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is

// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The

// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different

// SEW and LMUL are better for the surrounding vector instructions.


static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,

                                    const RISCVSubtarget &Subtarget) {

  MVT XLenVT = Subtarget.getXLenVT();


  // The smallest LMUL is only valid for the smallest element width.

  const unsigned ElementWidth = 8;


  // Determine the VF that corresponds to LMUL 1 for ElementWidth.

  unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;

  // We don't support VF==1 with ELEN==32.

  [[maybe_unused]] unsigned MinVF =

      RISCV::RVVBitsPerBlock / Subtarget.getELen();


  [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);

  assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&

         "Unexpected VF");


  bool Fractional = VF < LMul1VF;

  unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;

  unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);

  unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);


  SDLoc DL(N);


  SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);

  SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);


  SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));


  SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);

  SDValue Res =

      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);

  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);

}


static SDValue lowerCttzElts(SDValue Op, SelectionDAG &DAG,

                             const RISCVSubtarget &Subtarget) {

  SDValue Op0 = Op.getOperand(0);

  MVT OpVT = Op0.getSimpleValueType();

  MVT ContainerVT = OpVT;

  if (OpVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(OpVT, Subtarget);

    Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);

  }

  MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(Op);

  auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);

  SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);

  if (Op.getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON)

    return Res;


  // Convert -1 to VL.

  SDValue Setcc =

      DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);

  VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());

  return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);

}


static inline void promoteVCIXScalar(SDValue Op,

                                     MutableArrayRef<SDValue> Operands,

                                     SelectionDAG &DAG) {

  const RISCVSubtarget &Subtarget =

      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();


  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||

                  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;

  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);

  SDLoc DL(Op);


  const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =

      RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);

  if (!II || !II->hasScalarOperand())

    return;


  unsigned SplatOp = II->ScalarOperand + 1;

  assert(SplatOp < Op.getNumOperands());


  SDValue &ScalarOp = Operands[SplatOp];

  MVT OpVT = ScalarOp.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  // The code below is partially copied from lowerVectorIntrinsicScalars.

  // If this isn't a scalar, or its type is XLenVT we're done.

  if (!OpVT.isScalarInteger() || OpVT == XLenVT)

    return;


  // Manually emit promote operation for scalar operation.

  if (OpVT.bitsLT(XLenVT)) {

    unsigned ExtOpc =

        isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;

    ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);

  }

}


static void processVCIXOperands(SDValue OrigOp,

                                MutableArrayRef<SDValue> Operands,

                                SelectionDAG &DAG) {

  promoteVCIXScalar(OrigOp, Operands, DAG);

  const RISCVSubtarget &Subtarget =

      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();

  for (SDValue &V : Operands) {

    EVT ValType = V.getValueType();

    if (ValType.isVector() && ValType.isFloatingPoint()) {

      MVT InterimIVT =

          MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),

                           ValType.getVectorElementCount());

      V = DAG.getBitcast(InterimIVT, V);

    }

    if (ValType.isFixedLengthVector()) {

      MVT OpContainerVT =

          getContainerForFixedLengthVector(V.getSimpleValueType(), Subtarget);

      V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);

    }

  }

}


// LMUL * VLEN should be greater than or equal to EGS * SEW


static inline bool isValidEGW(int EGS, EVT VT,

                              const RISCVSubtarget &Subtarget) {

  return (Subtarget.getRealMinVLen() *

             VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=

         EGS * VT.getScalarSizeInBits();

}


SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

                                                     SelectionDAG &DAG) const {

  unsigned IntNo = Op.getConstantOperandVal(0);

  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();


  switch (IntNo) {

  default:

    break; // Don't custom lower most intrinsics.

  case Intrinsic::riscv_tuple_insert: {

    SDValue Vec = Op.getOperand(1);

    SDValue SubVec = Op.getOperand(2);

    SDValue Index = Op.getOperand(3);


    return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,

                       SubVec, Index);

  }

  case Intrinsic::riscv_tuple_extract: {

    SDValue Vec = Op.getOperand(1);

    SDValue Index = Op.getOperand(2);


    return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,

                       Index);

  }

  case Intrinsic::thread_pointer: {

    EVT PtrVT = getPointerTy(DAG.getDataLayout());

    return DAG.getRegister(RISCV::X4, PtrVT);

  }

  case Intrinsic::riscv_orc_b:

  case Intrinsic::riscv_brev8:

  case Intrinsic::riscv_sha256sig0:

  case Intrinsic::riscv_sha256sig1:

  case Intrinsic::riscv_sha256sum0:

  case Intrinsic::riscv_sha256sum1:

  case Intrinsic::riscv_sm3p0:

  case Intrinsic::riscv_sm3p1: {

    unsigned Opc;

    switch (IntNo) {

    case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;

    case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;

    case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;

    case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;

    case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;

    case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;

    case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;

    case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;

    }


    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));

  }

  case Intrinsic::riscv_sm4ks:

  case Intrinsic::riscv_sm4ed: {

    unsigned Opc =

        IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;


    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),

                       Op.getOperand(3));

  }

  case Intrinsic::riscv_zip:

  case Intrinsic::riscv_unzip: {

    unsigned Opc =

        IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;

    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));

  }

  case Intrinsic::riscv_mopr:

    return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),

                       Op.getOperand(2));


  case Intrinsic::riscv_moprr: {

    return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),

                       Op.getOperand(2), Op.getOperand(3));

  }

  case Intrinsic::riscv_clmulh:

  case Intrinsic::riscv_clmulr: {

    unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;

    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));

  }

  case Intrinsic::experimental_get_vector_length:

    return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);

  case Intrinsic::riscv_vmv_x_s: {

    SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));

    return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);

  }

  case Intrinsic::riscv_vfmv_f_s:

    return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);

  case Intrinsic::riscv_vmv_v_x:

    return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),

                            Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,

                            Subtarget);

  case Intrinsic::riscv_vfmv_v_f:

    return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));

  case Intrinsic::riscv_vmv_s_x: {

    SDValue Scalar = Op.getOperand(2);


    if (Scalar.getValueType().bitsLE(XLenVT)) {

      Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);

      return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),

                         Op.getOperand(1), Scalar, Op.getOperand(3));

    }


    assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");


    // This is an i64 value that lives in two scalar registers. We have to

    // insert this in a convoluted way. First we build vXi64 splat containing

    // the two values that we assemble using some bit math. Next we'll use

    // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask

    // to merge element 0 from our splat into the source vector.

    // FIXME: This is probably not the best way to do this, but it is

    // consistent with INSERT_VECTOR_ELT lowering so it is a good starting

    // point.

    //   sw lo, (a0)

    //   sw hi, 4(a0)

    //   vlse vX, (a0)

    //

    //   vid.v      vVid

    //   vmseq.vx   mMask, vVid, 0

    //   vmerge.vvm vDest, vSrc, vVal, mMask

    MVT VT = Op.getSimpleValueType();

    SDValue Vec = Op.getOperand(1);

    SDValue VL = getVLOperand(Op);


    SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);

    if (Op.getOperand(1).isUndef())

      return SplattedVal;

    SDValue SplattedIdx =

        DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),

                    DAG.getConstant(0, DL, MVT::i32), VL);


    MVT MaskVT = getMaskTypeFor(VT);

    SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);

    SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);

    SDValue SelectCond =

        DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,

                    {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),

                     DAG.getUNDEF(MaskVT), Mask, VL});

    return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,

                       Vec, DAG.getUNDEF(VT), VL);

  }

  case Intrinsic::riscv_vfmv_s_f:

    return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));

  // EGS * EEW >= 128 bits

  case Intrinsic::riscv_vaesdf_vv:

  case Intrinsic::riscv_vaesdf_vs:

  case Intrinsic::riscv_vaesdm_vv:

  case Intrinsic::riscv_vaesdm_vs:

  case Intrinsic::riscv_vaesef_vv:

  case Intrinsic::riscv_vaesef_vs:

  case Intrinsic::riscv_vaesem_vv:

  case Intrinsic::riscv_vaesem_vs:

  case Intrinsic::riscv_vaeskf1:

  case Intrinsic::riscv_vaeskf2:

  case Intrinsic::riscv_vaesz_vs:

  case Intrinsic::riscv_vsm4k:

  case Intrinsic::riscv_vsm4r_vv:

  case Intrinsic::riscv_vsm4r_vs: {

    if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||

        !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||

        !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))

      reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");

    return Op;

  }

  // EGS * EEW >= 256 bits

  case Intrinsic::riscv_vsm3c:

  case Intrinsic::riscv_vsm3me: {

    if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||

        !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))

      reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");

    return Op;

  }

  // zvknha(SEW=32)/zvknhb(SEW=[32|64])

  case Intrinsic::riscv_vsha2ch:

  case Intrinsic::riscv_vsha2cl:

  case Intrinsic::riscv_vsha2ms: {

    if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&

        !Subtarget.hasStdExtZvknhb())

      reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");

    if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||

        !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||

        !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))

      reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");

    return Op;

  }

  case Intrinsic::riscv_sf_vc_v_x:

  case Intrinsic::riscv_sf_vc_v_i:

  case Intrinsic::riscv_sf_vc_v_xv:

  case Intrinsic::riscv_sf_vc_v_iv:

  case Intrinsic::riscv_sf_vc_v_vv:

  case Intrinsic::riscv_sf_vc_v_fv:

  case Intrinsic::riscv_sf_vc_v_xvv:

  case Intrinsic::riscv_sf_vc_v_ivv:

  case Intrinsic::riscv_sf_vc_v_vvv:

  case Intrinsic::riscv_sf_vc_v_fvv:

  case Intrinsic::riscv_sf_vc_v_xvw:

  case Intrinsic::riscv_sf_vc_v_ivw:

  case Intrinsic::riscv_sf_vc_v_vvw:

  case Intrinsic::riscv_sf_vc_v_fvw: {

    MVT VT = Op.getSimpleValueType();


    SmallVector<SDValue> Operands{Op->op_values()};

    processVCIXOperands(Op, Operands, DAG);


    MVT RetVT = VT;

    if (VT.isFixedLengthVector())

      RetVT = getContainerForFixedLengthVector(VT);

    else if (VT.isFloatingPoint())

      RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),

                               VT.getVectorElementCount());


    SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);


    if (VT.isFixedLengthVector())

      NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);

    else if (VT.isFloatingPoint())

      NewNode = DAG.getBitcast(VT, NewNode);


    if (Op == NewNode)

      break;


    return NewNode;

  }

  }


  return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);

}


static inline SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG,

                                           unsigned Type) {

  SDLoc DL(Op);

  SmallVector<SDValue> Operands{Op->op_values()};

  Operands.erase(Operands.begin() + 1);


  const RISCVSubtarget &Subtarget =

      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();

  MVT VT = Op.getSimpleValueType();

  MVT RetVT = VT;

  MVT FloatVT = VT;


  if (VT.isFloatingPoint()) {

    RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),

                             VT.getVectorElementCount());

    FloatVT = RetVT;

  }

  if (VT.isFixedLengthVector())

    RetVT = getContainerForFixedLengthVector(RetVT, Subtarget);


  processVCIXOperands(Op, Operands, DAG);


  SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});

  SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);

  SDValue Chain = NewNode.getValue(1);


  if (VT.isFixedLengthVector())

    NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);

  if (VT.isFloatingPoint())

    NewNode = DAG.getBitcast(VT, NewNode);


  NewNode = DAG.getMergeValues({NewNode, Chain}, DL);


  return NewNode;

}


static inline SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG,

                                         unsigned Type) {

  SmallVector<SDValue> Operands{Op->op_values()};

  Operands.erase(Operands.begin() + 1);

  processVCIXOperands(Op, Operands, DAG);


  return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);

}


static SDValue


lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op,

                                  const RISCVSubtarget &Subtarget,

                                  SelectionDAG &DAG) {

  bool IsStrided;

  switch (IntNo) {

  case Intrinsic::riscv_seg2_load_mask:

  case Intrinsic::riscv_seg3_load_mask:

  case Intrinsic::riscv_seg4_load_mask:

  case Intrinsic::riscv_seg5_load_mask:

  case Intrinsic::riscv_seg6_load_mask:

  case Intrinsic::riscv_seg7_load_mask:

  case Intrinsic::riscv_seg8_load_mask:

    IsStrided = false;

    break;

  case Intrinsic::riscv_sseg2_load_mask:

  case Intrinsic::riscv_sseg3_load_mask:

  case Intrinsic::riscv_sseg4_load_mask:

  case Intrinsic::riscv_sseg5_load_mask:

  case Intrinsic::riscv_sseg6_load_mask:

  case Intrinsic::riscv_sseg7_load_mask:

  case Intrinsic::riscv_sseg8_load_mask:

    IsStrided = true;

    break;

  default:

    llvm_unreachable("unexpected intrinsic ID");

  };


  static const Intrinsic::ID VlsegInts[7] = {

      Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,

      Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,

      Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,

      Intrinsic::riscv_vlseg8_mask};

  static const Intrinsic::ID VlssegInts[7] = {

      Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,

      Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,

      Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,

      Intrinsic::riscv_vlsseg8_mask};


  SDLoc DL(Op);

  unsigned NF = Op->getNumValues() - 1;

  assert(NF >= 2 && NF <= 8 && "Unexpected seg number");

  MVT XLenVT = Subtarget.getXLenVT();

  MVT VT = Op->getSimpleValueType(0);

  MVT ContainerVT = ::getContainerForFixedLengthVector(VT, Subtarget);

  unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *

                ContainerVT.getScalarSizeInBits();

  EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);


  // Operands: (chain, int_id, pointer, mask, vl) or

  // (chain, int_id, pointer, offset, mask, vl)

  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);

  SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);

  MVT MaskVT = Mask.getSimpleValueType();

  MVT MaskContainerVT = ::getContainerForFixedLengthVector(MaskVT, Subtarget);

  Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);


  SDValue IntID = DAG.getTargetConstant(

      IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);

  auto *Load = cast<MemIntrinsicSDNode>(Op);


  SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});

  SmallVector<SDValue, 9> Ops = {

      Load->getChain(),

      IntID,

      DAG.getUNDEF(VecTupTy),

      Op.getOperand(2),

      Mask,

      VL,

      DAG.getTargetConstant(

          RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT),

      DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};

  // Insert the stride operand.

  if (IsStrided)

    Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));


  SDValue Result =

      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,

                              Load->getMemoryVT(), Load->getMemOperand());

  SmallVector<SDValue, 9> Results;

  for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {

    SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,

                                 Result.getValue(0),

                                 DAG.getTargetConstant(RetIdx, DL, MVT::i32));

    Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));

  }

  Results.push_back(Result.getValue(1));

  return DAG.getMergeValues(Results, DL);

}


SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,

                                                    SelectionDAG &DAG) const {

  unsigned IntNo = Op.getConstantOperandVal(1);

  switch (IntNo) {

  default:

    break;

  case Intrinsic::riscv_seg2_load_mask:

  case Intrinsic::riscv_seg3_load_mask:

  case Intrinsic::riscv_seg4_load_mask:

  case Intrinsic::riscv_seg5_load_mask:

  case Intrinsic::riscv_seg6_load_mask:

  case Intrinsic::riscv_seg7_load_mask:

  case Intrinsic::riscv_seg8_load_mask:

  case Intrinsic::riscv_sseg2_load_mask:

  case Intrinsic::riscv_sseg3_load_mask:

  case Intrinsic::riscv_sseg4_load_mask:

  case Intrinsic::riscv_sseg5_load_mask:

  case Intrinsic::riscv_sseg6_load_mask:

  case Intrinsic::riscv_sseg7_load_mask:

  case Intrinsic::riscv_sseg8_load_mask:

    return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);


  case Intrinsic::riscv_sf_vc_v_x_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);

  case Intrinsic::riscv_sf_vc_v_i_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);

  case Intrinsic::riscv_sf_vc_v_xv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);

  case Intrinsic::riscv_sf_vc_v_iv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);

  case Intrinsic::riscv_sf_vc_v_vv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);

  case Intrinsic::riscv_sf_vc_v_fv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);

  case Intrinsic::riscv_sf_vc_v_xvv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);

  case Intrinsic::riscv_sf_vc_v_ivv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);

  case Intrinsic::riscv_sf_vc_v_vvv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);

  case Intrinsic::riscv_sf_vc_v_fvv_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);

  case Intrinsic::riscv_sf_vc_v_xvw_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);

  case Intrinsic::riscv_sf_vc_v_ivw_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);

  case Intrinsic::riscv_sf_vc_v_vvw_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);

  case Intrinsic::riscv_sf_vc_v_fvw_se:

    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);

  }


  return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);

}


static SDValue


lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op,

                                   const RISCVSubtarget &Subtarget,

                                   SelectionDAG &DAG) {

  bool IsStrided;

  switch (IntNo) {

  case Intrinsic::riscv_seg2_store_mask:

  case Intrinsic::riscv_seg3_store_mask:

  case Intrinsic::riscv_seg4_store_mask:

  case Intrinsic::riscv_seg5_store_mask:

  case Intrinsic::riscv_seg6_store_mask:

  case Intrinsic::riscv_seg7_store_mask:

  case Intrinsic::riscv_seg8_store_mask:

    IsStrided = false;

    break;

  case Intrinsic::riscv_sseg2_store_mask:

  case Intrinsic::riscv_sseg3_store_mask:

  case Intrinsic::riscv_sseg4_store_mask:

  case Intrinsic::riscv_sseg5_store_mask:

  case Intrinsic::riscv_sseg6_store_mask:

  case Intrinsic::riscv_sseg7_store_mask:

  case Intrinsic::riscv_sseg8_store_mask:

    IsStrided = true;

    break;

  default:

    llvm_unreachable("unexpected intrinsic ID");

  }


  SDLoc DL(Op);

  static const Intrinsic::ID VssegInts[] = {

      Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,

      Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,

      Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,

      Intrinsic::riscv_vsseg8_mask};

  static const Intrinsic::ID VsssegInts[] = {

      Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,

      Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,

      Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,

      Intrinsic::riscv_vssseg8_mask};


  // Operands: (chain, int_id, vec*, ptr, mask, vl) or

  // (chain, int_id, vec*, ptr, stride, mask, vl)

  unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);

  assert(NF >= 2 && NF <= 8 && "Unexpected seg number");

  MVT XLenVT = Subtarget.getXLenVT();

  MVT VT = Op->getOperand(2).getSimpleValueType();

  MVT ContainerVT = ::getContainerForFixedLengthVector(VT, Subtarget);

  unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *

                ContainerVT.getScalarSizeInBits();

  EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);


  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);

  SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);

  MVT MaskVT = Mask.getSimpleValueType();

  MVT MaskContainerVT = ::getContainerForFixedLengthVector(MaskVT, Subtarget);

  Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);


  SDValue IntID = DAG.getTargetConstant(

      IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);

  SDValue Ptr = Op->getOperand(NF + 2);


  auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);


  SDValue StoredVal = DAG.getUNDEF(VecTupTy);

  for (unsigned i = 0; i < NF; i++)

    StoredVal = DAG.getNode(

        RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,

        convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),

                                DAG, Subtarget),

        DAG.getTargetConstant(i, DL, MVT::i32));


  SmallVector<SDValue, 10> Ops = {

      FixedIntrinsic->getChain(),

      IntID,

      StoredVal,

      Ptr,

      Mask,

      VL,

      DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};

  // Insert the stride operand.

  if (IsStrided)

    Ops.insert(std::next(Ops.begin(), 4),

               Op.getOperand(Op.getNumOperands() - 3));


  return DAG.getMemIntrinsicNode(

      ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,

      FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());

}


SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,

                                                 SelectionDAG &DAG) const {

  unsigned IntNo = Op.getConstantOperandVal(1);

  switch (IntNo) {

  default:

    break;

  case Intrinsic::riscv_seg2_store_mask:

  case Intrinsic::riscv_seg3_store_mask:

  case Intrinsic::riscv_seg4_store_mask:

  case Intrinsic::riscv_seg5_store_mask:

  case Intrinsic::riscv_seg6_store_mask:

  case Intrinsic::riscv_seg7_store_mask:

  case Intrinsic::riscv_seg8_store_mask:

  case Intrinsic::riscv_sseg2_store_mask:

  case Intrinsic::riscv_sseg3_store_mask:

  case Intrinsic::riscv_sseg4_store_mask:

  case Intrinsic::riscv_sseg5_store_mask:

  case Intrinsic::riscv_sseg6_store_mask:

  case Intrinsic::riscv_sseg7_store_mask:

  case Intrinsic::riscv_sseg8_store_mask:

    return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);


  case Intrinsic::riscv_sf_vc_xv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);

  case Intrinsic::riscv_sf_vc_iv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);

  case Intrinsic::riscv_sf_vc_vv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);

  case Intrinsic::riscv_sf_vc_fv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);

  case Intrinsic::riscv_sf_vc_xvv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);

  case Intrinsic::riscv_sf_vc_ivv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);

  case Intrinsic::riscv_sf_vc_vvv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);

  case Intrinsic::riscv_sf_vc_fvv_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);

  case Intrinsic::riscv_sf_vc_xvw_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);

  case Intrinsic::riscv_sf_vc_ivw_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);

  case Intrinsic::riscv_sf_vc_vvw_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);

  case Intrinsic::riscv_sf_vc_fvw_se:

    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);

  }


  return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);

}


static unsigned getRVVReductionOp(unsigned ISDOpcode) {

  switch (ISDOpcode) {

  default:

    llvm_unreachable("Unhandled reduction");

  case ISD::VP_REDUCE_ADD:

  case ISD::VECREDUCE_ADD:

    return RISCVISD::VECREDUCE_ADD_VL;

  case ISD::VP_REDUCE_UMAX:

  case ISD::VECREDUCE_UMAX:

    return RISCVISD::VECREDUCE_UMAX_VL;

  case ISD::VP_REDUCE_SMAX:

  case ISD::VECREDUCE_SMAX:

    return RISCVISD::VECREDUCE_SMAX_VL;

  case ISD::VP_REDUCE_UMIN:

  case ISD::VECREDUCE_UMIN:

    return RISCVISD::VECREDUCE_UMIN_VL;

  case ISD::VP_REDUCE_SMIN:

  case ISD::VECREDUCE_SMIN:

    return RISCVISD::VECREDUCE_SMIN_VL;

  case ISD::VP_REDUCE_AND:

  case ISD::VECREDUCE_AND:

    return RISCVISD::VECREDUCE_AND_VL;

  case ISD::VP_REDUCE_OR:

  case ISD::VECREDUCE_OR:

    return RISCVISD::VECREDUCE_OR_VL;

  case ISD::VP_REDUCE_XOR:

  case ISD::VECREDUCE_XOR:

    return RISCVISD::VECREDUCE_XOR_VL;

  case ISD::VP_REDUCE_FADD:

    return RISCVISD::VECREDUCE_FADD_VL;

  case ISD::VP_REDUCE_SEQ_FADD:

    return RISCVISD::VECREDUCE_SEQ_FADD_VL;

  case ISD::VP_REDUCE_FMAX:

  case ISD::VP_REDUCE_FMAXIMUM:

    return RISCVISD::VECREDUCE_FMAX_VL;

  case ISD::VP_REDUCE_FMIN:

  case ISD::VP_REDUCE_FMINIMUM:

    return RISCVISD::VECREDUCE_FMIN_VL;

  }


}


SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,

                                                         SelectionDAG &DAG,

                                                         bool IsVP) const {

  SDLoc DL(Op);

  SDValue Vec = Op.getOperand(IsVP ? 1 : 0);

  MVT VecVT = Vec.getSimpleValueType();

  assert((Op.getOpcode() == ISD::VECREDUCE_AND ||

          Op.getOpcode() == ISD::VECREDUCE_OR ||

          Op.getOpcode() == ISD::VECREDUCE_XOR ||

          Op.getOpcode() == ISD::VP_REDUCE_AND ||

          Op.getOpcode() == ISD::VP_REDUCE_OR ||

          Op.getOpcode() == ISD::VP_REDUCE_XOR) &&

         "Unexpected reduction lowering");


  MVT XLenVT = Subtarget.getXLenVT();


  MVT ContainerVT = VecVT;

  if (VecVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

  }


  SDValue Mask, VL;

  if (IsVP) {

    Mask = Op.getOperand(2);

    VL = Op.getOperand(3);

  } else {

    std::tie(Mask, VL) =

        getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);

  }


  ISD::CondCode CC;

  switch (Op.getOpcode()) {

  default:

    llvm_unreachable("Unhandled reduction");

  case ISD::VECREDUCE_AND:

  case ISD::VP_REDUCE_AND: {

    // vcpop ~x == 0

    SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);

    if (IsVP || VecVT.isFixedLengthVector())

      Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);

    else

      Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);

    Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);

    CC = ISD::SETEQ;

    break;

  }

  case ISD::VECREDUCE_OR:

  case ISD::VP_REDUCE_OR:

    // vcpop x != 0

    Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);

    CC = ISD::SETNE;

    break;

  case ISD::VECREDUCE_XOR:

  case ISD::VP_REDUCE_XOR: {

    // ((vcpop x) & 1) != 0

    SDValue One = DAG.getConstant(1, DL, XLenVT);

    Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);

    Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);

    CC = ISD::SETNE;

    break;

  }

  }


  SDValue Zero = DAG.getConstant(0, DL, XLenVT);

  SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);

  SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);


  if (!IsVP)

    return SetCC;


  // Now include the start value in the operation.

  // Note that we must return the start value when no elements are operated

  // upon. The vcpop instructions we've emitted in each case above will return

  // 0 for an inactive vector, and so we've already received the neutral value:

  // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we

  // can simply include the start value.

  unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());

  return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));

}


static bool isNonZeroAVL(SDValue AVL) {

  auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);

  auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);

  return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||

         (ImmAVL && ImmAVL->getZExtValue() >= 1);

}


/// Helper to lower a reduction sequence of the form:

/// scalar = reduce_op vec, scalar_start


static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,

                                 SDValue StartValue, SDValue Vec, SDValue Mask,

                                 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  const MVT VecVT = Vec.getSimpleValueType();

  const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);

  const MVT XLenVT = Subtarget.getXLenVT();

  const bool NonZeroAVL = isNonZeroAVL(VL);


  // The reduction needs an LMUL1 input; do the splat at either LMUL1

  // or the original VT if fractional.

  auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;

  // We reuse the VL of the reduction to reduce vsetvli toggles if we can

  // prove it is non-zero.  For the AVL=0 case, we need the scalar to

  // be the result of the reduction operation.

  auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);

  SDValue InitialValue =

      lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);

  if (M1VT != InnerVT)

    InitialValue =

        DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);

  SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;

  SDValue Policy = DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);

  SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};

  SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);

  return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);

}


SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,

                                            SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Vec = Op.getOperand(0);

  EVT VecEVT = Vec.getValueType();


  unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());


  // Due to ordering in legalize types we may have a vector type that needs to

  // be split. Do that manually so we can get down to a legal type.

  while (getTypeAction(*DAG.getContext(), VecEVT) ==

         TargetLowering::TypeSplitVector) {

    auto [Lo, Hi] = DAG.SplitVector(Vec, DL);

    VecEVT = Lo.getValueType();

    Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);

  }


  // TODO: The type may need to be widened rather than split. Or widened before

  // it can be split.

  if (!isTypeLegal(VecEVT))

    return SDValue();


  MVT VecVT = VecEVT.getSimpleVT();

  MVT VecEltVT = VecVT.getVectorElementType();

  unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());


  MVT ContainerVT = VecVT;

  if (VecVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

  }


  auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);


  SDValue StartV;

  switch (BaseOpc) {

  default:

    StartV = DAG.getIdentityElement(BaseOpc, DL, VecEltVT, SDNodeFlags());

    break;

  case ISD::AND:

  case ISD::OR:

  case ISD::UMAX:

  case ISD::UMIN:

  case ISD::SMAX:

  case ISD::SMIN:

    StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);

    break;

  }

  return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,

                           Mask, VL, DL, DAG, Subtarget);

}


// Given a reduction op, this function returns the matching reduction opcode,

// the vector SDValue and the scalar SDValue required to lower this to a

// RISCVISD node.

static std::tuple<unsigned, SDValue, SDValue>


getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,

                               const RISCVSubtarget &Subtarget) {

  SDLoc DL(Op);

  auto Flags = Op->getFlags();

  unsigned Opcode = Op.getOpcode();

  switch (Opcode) {

  default:

    llvm_unreachable("Unhandled reduction");

  case ISD::VECREDUCE_FADD: {

    // Use positive zero if we can. It is cheaper to materialize.

    SDValue Zero =

        DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);

    return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);

  }

  case ISD::VECREDUCE_SEQ_FADD:

    return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),

                           Op.getOperand(0));

  case ISD::VECREDUCE_FMINIMUM:

  case ISD::VECREDUCE_FMAXIMUM:

  case ISD::VECREDUCE_FMIN:

  case ISD::VECREDUCE_FMAX: {

    SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);

    unsigned RVVOpc =

        (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)

            ? RISCVISD::VECREDUCE_FMIN_VL

            : RISCVISD::VECREDUCE_FMAX_VL;

    return std::make_tuple(RVVOpc, Op.getOperand(0), Front);

  }

  }

}


SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,

                                              SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VecEltVT = Op.getSimpleValueType();


  unsigned RVVOpcode;

  SDValue VectorVal, ScalarVal;

  std::tie(RVVOpcode, VectorVal, ScalarVal) =

      getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);

  MVT VecVT = VectorVal.getSimpleValueType();


  MVT ContainerVT = VecVT;

  if (VecVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VecVT);

    VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);

  }


  MVT ResVT = Op.getSimpleValueType();

  auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);

  SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,

                                  VL, DL, DAG, Subtarget);

  if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&

      Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)

    return Res;


  if (Op->getFlags().hasNoNaNs())

    return Res;


  // Force output to NaN if any element is Nan.

  SDValue IsNan =

      DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),

                  {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),

                   DAG.getUNDEF(Mask.getValueType()), Mask, VL});

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);

  SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,

                                DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);

  return DAG.getSelect(

      DL, ResVT, NoNaNs, Res,

      DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));

}


SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,

                                           SelectionDAG &DAG) const {

  SDLoc DL(Op);

  unsigned Opc = Op.getOpcode();

  SDValue Start = Op.getOperand(0);

  SDValue Vec = Op.getOperand(1);

  EVT VecEVT = Vec.getValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  // TODO: The type may need to be widened rather than split. Or widened before

  // it can be split.

  if (!isTypeLegal(VecEVT))

    return SDValue();


  MVT VecVT = VecEVT.getSimpleVT();

  unsigned RVVOpcode = getRVVReductionOp(Opc);


  if (VecVT.isFixedLengthVector()) {

    auto ContainerVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

  }


  SDValue VL = Op.getOperand(3);

  SDValue Mask = Op.getOperand(2);

  SDValue Res =

      lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),

                        Vec, Mask, VL, DL, DAG, Subtarget);

  if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||

      Op->getFlags().hasNoNaNs())

    return Res;


  // Propagate NaNs.

  MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());

  // Check if any of the elements in Vec is NaN.

  SDValue IsNaN = DAG.getNode(

      RISCVISD::SETCC_VL, DL, PredVT,

      {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});

  SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);

  // Check if the start value is NaN.

  SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);

  VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);

  SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,

                                DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);

  MVT ResVT = Res.getSimpleValueType();

  return DAG.getSelect(

      DL, ResVT, NoNaNs, Res,

      DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));

}


SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,

                                                   SelectionDAG &DAG) const {

  SDValue Vec = Op.getOperand(0);

  SDValue SubVec = Op.getOperand(1);

  MVT VecVT = Vec.getSimpleValueType();

  MVT SubVecVT = SubVec.getSimpleValueType();


  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();

  unsigned OrigIdx = Op.getConstantOperandVal(2);

  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();


  if (OrigIdx == 0 && Vec.isUndef())

    return Op;


  // We don't have the ability to slide mask vectors up indexed by their i1

  // elements; the smallest we can do is i8. Often we are able to bitcast to

  // equivalent i8 vectors. Note that when inserting a fixed-length vector

  // into a scalable one, we might not necessarily have enough scalable

  // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.

  if (SubVecVT.getVectorElementType() == MVT::i1) {

    if (VecVT.getVectorMinNumElements() >= 8 &&

        SubVecVT.getVectorMinNumElements() >= 8) {

      assert(OrigIdx % 8 == 0 && "Invalid index");

      assert(VecVT.getVectorMinNumElements() % 8 == 0 &&

             SubVecVT.getVectorMinNumElements() % 8 == 0 &&

             "Unexpected mask vector lowering");

      OrigIdx /= 8;

      SubVecVT =

          MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,

                           SubVecVT.isScalableVector());

      VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,

                               VecVT.isScalableVector());

      Vec = DAG.getBitcast(VecVT, Vec);

      SubVec = DAG.getBitcast(SubVecVT, SubVec);

    } else {

      // We can't slide this mask vector up indexed by its i1 elements.

      // This poses a problem when we wish to insert a scalable vector which

      // can't be re-expressed as a larger type. Just choose the slow path and

      // extend to a larger type, then truncate back down.

      MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);

      MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);

      Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);

      SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);

      Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,

                        Op.getOperand(2));

      SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);

      return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);

    }

  }


  // If the subvector vector is a fixed-length type and we don't know VLEN

  // exactly, we cannot use subregister manipulation to simplify the codegen; we

  // don't know which register of a LMUL group contains the specific subvector

  // as we only know the minimum register size. Therefore we must slide the

  // vector group up the full amount.

  const auto VLen = Subtarget.getRealVLen();

  if (SubVecVT.isFixedLengthVector() && !VLen) {

    MVT ContainerVT = VecVT;

    if (VecVT.isFixedLengthVector()) {

      ContainerVT = getContainerForFixedLengthVector(VecVT);

      Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

    }


    SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);


    SDValue Mask =

        getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;

    // Set the vector length to only the number of elements we care about. Note

    // that for slideup this includes the offset.

    unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();

    SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);


    // Use tail agnostic policy if we're inserting over Vec's tail.

    unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;

    if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())

      Policy = RISCVVType::TAIL_AGNOSTIC;


    // If we're inserting into the lowest elements, use a tail undisturbed

    // vmv.v.v.

    if (OrigIdx == 0) {

      SubVec =

          DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);

    } else {

      SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);

      SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,

                           SlideupAmt, Mask, VL, Policy);

    }


    if (VecVT.isFixedLengthVector())

      SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);

    return DAG.getBitcast(Op.getValueType(), SubVec);

  }


  MVT ContainerVecVT = VecVT;

  if (VecVT.isFixedLengthVector()) {

    ContainerVecVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);

  }


  MVT ContainerSubVecVT = SubVecVT;

  if (SubVecVT.isFixedLengthVector()) {

    ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);

    SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);

  }


  unsigned SubRegIdx;

  ElementCount RemIdx;

  // insert_subvector scales the index by vscale if the subvector is scalable,

  // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if

  // we have a fixed length subvector, we need to adjust the index by 1/vscale.

  if (SubVecVT.isFixedLengthVector()) {

    assert(VLen);

    unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;

    auto Decompose =

        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

            ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);

    SubRegIdx = Decompose.first;

    RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +

                                    (OrigIdx % Vscale));

  } else {

    auto Decompose =

        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

            ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);

    SubRegIdx = Decompose.first;

    RemIdx = ElementCount::getScalable(Decompose.second);

  }


  TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);

  assert(isPowerOf2_64(

      Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));

  bool ExactlyVecRegSized =

      Subtarget.expandVScale(SubVecVT.getSizeInBits())

          .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));


  // 1. If the Idx has been completely eliminated and this subvector's size is

  // a vector register or a multiple thereof, or the surrounding elements are

  // undef, then this is a subvector insert which naturally aligns to a vector

  // register. These can easily be handled using subregister manipulation.

  // 2. If the subvector isn't an exact multiple of a valid register group size,

  // then the insertion must preserve the undisturbed elements of the register.

  // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1

  // vector type (which resolves to a subregister copy), performing a VSLIDEUP

  // to place the subvector within the vector register, and an INSERT_SUBVECTOR

  // of that LMUL=1 type back into the larger vector (resolving to another

  // subregister operation). See below for how our VSLIDEUP works. We go via a

  // LMUL=1 type to avoid allocating a large register group to hold our

  // subvector.

  if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {

    if (SubVecVT.isFixedLengthVector()) {

      // We may get NoSubRegister if inserting at index 0 and the subvec

      // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0

      if (SubRegIdx == RISCV::NoSubRegister) {

        assert(OrigIdx == 0);

        return Op;

      }


      // Use a insert_subvector that will resolve to an insert subreg.

      assert(VLen);

      unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;

      SDValue Insert =

          DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);

      if (VecVT.isFixedLengthVector())

        Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);

      return Insert;

    }

    return Op;

  }


  // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements

  // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy

  // (in our case undisturbed). This means we can set up a subvector insertion

  // where OFFSET is the insertion offset, and the VL is the OFFSET plus the

  // size of the subvector.

  MVT InterSubVT = ContainerVecVT;

  SDValue AlignedExtract = Vec;

  unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();

  if (SubVecVT.isFixedLengthVector()) {

    assert(VLen);

    AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;

  }

  if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {

    InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);

    // Extract a subvector equal to the nearest full vector register type. This

    // should resolve to a EXTRACT_SUBREG instruction.

    AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);

  }


  SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);


  auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);


  ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();

  VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());


  // Use tail agnostic policy if we're inserting over InterSubVT's tail.

  unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;

  if (Subtarget.expandVScale(EndIndex) ==

      Subtarget.expandVScale(InterSubVT.getVectorElementCount()))

    Policy = RISCVVType::TAIL_AGNOSTIC;


  // If we're inserting into the lowest elements, use a tail undisturbed

  // vmv.v.v.

  if (RemIdx.isZero()) {

    SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,

                         SubVec, VL);

  } else {

    SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);


    // Construct the vector length corresponding to RemIdx + length(SubVecVT).

    VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);


    SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,

                         SlideupAmt, Mask, VL, Policy);

  }


  // If required, insert this subvector back into the correct vector register.

  // This should resolve to an INSERT_SUBREG instruction.

  if (ContainerVecVT.bitsGT(InterSubVT))

    SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);


  if (VecVT.isFixedLengthVector())

    SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);


  // We might have bitcast from a mask type: cast back to the original type if

  // required.

  return DAG.getBitcast(Op.getSimpleValueType(), SubVec);

}


SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,

                                                    SelectionDAG &DAG) const {

  SDValue Vec = Op.getOperand(0);

  MVT SubVecVT = Op.getSimpleValueType();

  MVT VecVT = Vec.getSimpleValueType();


  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();

  unsigned OrigIdx = Op.getConstantOperandVal(1);

  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();


  // With an index of 0 this is a cast-like subvector, which can be performed

  // with subregister operations.

  if (OrigIdx == 0)

    return Op;


  // We don't have the ability to slide mask vectors down indexed by their i1

  // elements; the smallest we can do is i8. Often we are able to bitcast to

  // equivalent i8 vectors. Note that when extracting a fixed-length vector

  // from a scalable one, we might not necessarily have enough scalable

  // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.

  if (SubVecVT.getVectorElementType() == MVT::i1) {

    if (VecVT.getVectorMinNumElements() >= 8 &&

        SubVecVT.getVectorMinNumElements() >= 8) {

      assert(OrigIdx % 8 == 0 && "Invalid index");

      assert(VecVT.getVectorMinNumElements() % 8 == 0 &&

             SubVecVT.getVectorMinNumElements() % 8 == 0 &&

             "Unexpected mask vector lowering");

      OrigIdx /= 8;

      SubVecVT =

          MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,

                           SubVecVT.isScalableVector());

      VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,

                               VecVT.isScalableVector());

      Vec = DAG.getBitcast(VecVT, Vec);

    } else {

      // We can't slide this mask vector down, indexed by its i1 elements.

      // This poses a problem when we wish to extract a scalable vector which

      // can't be re-expressed as a larger type. Just choose the slow path and

      // extend to a larger type, then truncate back down.

      // TODO: We could probably improve this when extracting certain fixed

      // from fixed, where we can extract as i8 and shift the correct element

      // right to reach the desired subvector?

      MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);

      MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);

      Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);

      Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,

                        Op.getOperand(1));

      SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);

      return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);

    }

  }


  const auto VLen = Subtarget.getRealVLen();


  // If the subvector vector is a fixed-length type and we don't know VLEN

  // exactly, we cannot use subregister manipulation to simplify the codegen; we

  // don't know which register of a LMUL group contains the specific subvector

  // as we only know the minimum register size. Therefore we must slide the

  // vector group down the full amount.

  if (SubVecVT.isFixedLengthVector() && !VLen) {

    MVT ContainerVT = VecVT;

    if (VecVT.isFixedLengthVector()) {

      ContainerVT = getContainerForFixedLengthVector(VecVT);

      Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

    }


    // Shrink down Vec so we're performing the slidedown on a smaller LMUL.

    unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;

    if (auto ShrunkVT =

            getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {

      ContainerVT = *ShrunkVT;

      Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);

    }


    SDValue Mask =

        getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;

    // Set the vector length to only the number of elements we care about. This

    // avoids sliding down elements we're going to discard straight away.

    SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);

    SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);

    SDValue Slidedown =

        getVSlidedown(DAG, Subtarget, DL, ContainerVT,

                      DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);

    // Now we can use a cast-like subvector extract to get the result.

    Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);

    return DAG.getBitcast(Op.getValueType(), Slidedown);

  }


  if (VecVT.isFixedLengthVector()) {

    VecVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);

  }


  MVT ContainerSubVecVT = SubVecVT;

  if (SubVecVT.isFixedLengthVector())

    ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);


  unsigned SubRegIdx;

  ElementCount RemIdx;

  // extract_subvector scales the index by vscale if the subvector is scalable,

  // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if

  // we have a fixed length subvector, we need to adjust the index by 1/vscale.

  if (SubVecVT.isFixedLengthVector()) {

    assert(VLen);

    unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;

    auto Decompose =

        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

            VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);

    SubRegIdx = Decompose.first;

    RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +

                                    (OrigIdx % Vscale));

  } else {

    auto Decompose =

        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

            VecVT, ContainerSubVecVT, OrigIdx, TRI);

    SubRegIdx = Decompose.first;

    RemIdx = ElementCount::getScalable(Decompose.second);

  }


  // If the Idx has been completely eliminated then this is a subvector extract

  // which naturally aligns to a vector register. These can easily be handled

  // using subregister manipulation. We use an extract_subvector that will

  // resolve to an extract subreg.

  if (RemIdx.isZero()) {

    if (SubVecVT.isFixedLengthVector()) {

      assert(VLen);

      unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;

      Vec =

          DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);

      return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);

    }

    return Op;

  }


  // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT

  // was > M1 then the index would need to be a multiple of VLMAX, and so would

  // divide exactly.

  assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||

         getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);


  // If the vector type is an LMUL-group type, extract a subvector equal to the

  // nearest full vector register type.

  MVT InterSubVT = VecVT;

  if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {

    // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and

    // we should have successfully decomposed the extract into a subregister.

    // We use an extract_subvector that will resolve to a subreg extract.

    assert(SubRegIdx != RISCV::NoSubRegister);

    (void)SubRegIdx;

    unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();

    if (SubVecVT.isFixedLengthVector()) {

      assert(VLen);

      Idx /= *VLen / RISCV::RVVBitsPerBlock;

    }

    InterSubVT = RISCVTargetLowering::getM1VT(VecVT);

    Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);

  }


  // Slide this vector register down by the desired number of elements in order

  // to place the desired subvector starting at element 0.

  SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);

  auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);

  if (SubVecVT.isFixedLengthVector())

    VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);

  SDValue Slidedown =

      getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),

                    Vec, SlidedownAmt, Mask, VL);


  // Now the vector is in the right position, extract our final subvector. This

  // should resolve to a COPY.

  Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);


  // We might have bitcast from a mask type: cast back to the original type if

  // required.

  return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);

}


// Widen a vector's operands to i8, then truncate its results back to the

// original type, typically i1.  All operand and result types must be the same.


static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,

                                  SelectionDAG &DAG) {

  MVT VT = N.getSimpleValueType();

  MVT WideVT = VT.changeVectorElementType(MVT::i8);

  SmallVector<SDValue, 4> WideOps;

  for (SDValue Op : N->ops()) {

    assert(Op.getSimpleValueType() == VT &&

           "Operands and result must be same type");

    WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));

  }


  unsigned NumVals = N->getNumValues();


  SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(

      NumVals,

      N.getValueType().changeVectorElementType(*DAG.getContext(), MVT::i8)));

  SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);

  SmallVector<SDValue, 4> TruncVals;

  for (unsigned I = 0; I < NumVals; I++) {

    TruncVals.push_back(

        DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),

                     DAG.getConstant(0, DL, WideVT), ISD::SETNE));

  }


  if (TruncVals.size() > 1)

    return DAG.getMergeValues(TruncVals, DL);

  return TruncVals.front();

}


SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,

                                                      SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VecVT = Op.getSimpleValueType();


  const unsigned Factor = Op->getNumValues();

  assert(Factor <= 8);


  // 1 bit element vectors need to be widened to e8

  if (VecVT.getVectorElementType() == MVT::i1)

    return widenVectorOpsToi8(Op, DL, DAG);


  // Convert to scalable vectors first.

  if (VecVT.isFixedLengthVector()) {

    MVT ContainerVT = getContainerForFixedLengthVector(VecVT);

    SmallVector<SDValue, 8> Ops(Factor);

    for (unsigned i = 0U; i < Factor; ++i)

      Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,

                                       Subtarget);


    SmallVector<EVT, 8> VTs(Factor, ContainerVT);

    SDValue NewDeinterleave =

        DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs, Ops);


    SmallVector<SDValue, 8> Res(Factor);

    for (unsigned i = 0U; i < Factor; ++i)

      Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),

                                         DAG, Subtarget);

    return DAG.getMergeValues(Res, DL);

  }


  // If concatenating would exceed LMUL=8, we need to split.

  if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >

      (8 * RISCV::RVVBitsPerBlock)) {

    SmallVector<SDValue, 8> Ops(Factor * 2);

    for (unsigned i = 0; i != Factor; ++i) {

      auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);

      Ops[i * 2] = OpLo;

      Ops[i * 2 + 1] = OpHi;

    }


    SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());


    SDValue Lo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs,

                             ArrayRef(Ops).slice(0, Factor));

    SDValue Hi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs,

                             ArrayRef(Ops).slice(Factor, Factor));


    SmallVector<SDValue, 8> Res(Factor);

    for (unsigned i = 0; i != Factor; ++i)

      Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),

                           Hi.getValue(i));


    return DAG.getMergeValues(Res, DL);

  }


  if (Subtarget.hasStdExtZvzip() && Factor == 2) {

    MVT VT = Op->getSimpleValueType(0);

    MVT NewVT = VT.getDoubleNumVectorElementsVT();

    if (isTypeLegal(NewVT) && isLegalVTForZvzipOperand(VT, Subtarget)) {

      SDValue V1 = Op->getOperand(0);

      SDValue V2 = Op->getOperand(1);

      SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2);

      SDValue Even =

          lowerZvzipVUNZIP(RISCVISD::VUNZIPE_VL, V, DL, DAG, Subtarget);

      SDValue Odd =

          lowerZvzipVUNZIP(RISCVISD::VUNZIPO_VL, V, DL, DAG, Subtarget);

      return DAG.getMergeValues({Even, Odd}, DL);

    }

  }


  SmallVector<SDValue, 8> Ops(Op->op_values());


  // Concatenate the vectors as one vector to deinterleave

  MVT ConcatVT =

      MVT::getVectorVT(VecVT.getVectorElementType(),

                       VecVT.getVectorElementCount().multiplyCoefficientBy(

                           PowerOf2Ceil(Factor)));

  if (Ops.size() < PowerOf2Ceil(Factor))

    Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));

  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);


  if (Factor == 2) {

    // We can deinterleave through vnsrl.wi if the element type is smaller than

    // ELEN

    if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {

      SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);

      SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);

      return DAG.getMergeValues({Even, Odd}, DL);

    }


    // For the indices, use the vmv.v.x of an i8 constant to fill the largest

    // possibly mask vector, then extract the required subvector.  Doing this

    // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask

    // creation to be rematerialized during register allocation to reduce

    // register pressure if needed.


    MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);


    SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);

    EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);

    SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);


    SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);

    OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);

    SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);


    // vcompress the even and odd elements into two separate vectors

    SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,

                                   EvenMask, DAG.getUNDEF(ConcatVT));

    SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,

                                  OddMask, DAG.getUNDEF(ConcatVT));


    // Extract the result half of the gather for even and odd

    SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);

    SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);


    return DAG.getMergeValues({Even, Odd}, DL);

  }


  // Store with unit-stride store and load it back with segmented load.

  MVT XLenVT = Subtarget.getXLenVT();

  auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);

  SDValue Passthru = DAG.getUNDEF(ConcatVT);


  // Allocate a stack slot.

  Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);

  SDValue StackPtr =

      DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);

  auto &MF = DAG.getMachineFunction();

  auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

  auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);


  SDValue StoreOps[] = {DAG.getEntryNode(),

                        DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),

                        Concat, StackPtr, VL};


  SDValue Chain = DAG.getMemIntrinsicNode(

      ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,

      ConcatVT.getVectorElementType(), PtrInfo, Alignment,

      MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer());


  static const Intrinsic::ID VlsegIntrinsicsIds[] = {

      Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,

      Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,

      Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,

      Intrinsic::riscv_vlseg8_mask};


  SDValue LoadOps[] = {

      Chain,

      DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),

      Passthru,

      StackPtr,

      Mask,

      VL,

      DAG.getTargetConstant(

          RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT),

      DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};


  unsigned Sz =

      Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();

  EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);


  SDValue Load = DAG.getMemIntrinsicNode(

      ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),

      LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,

      MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer());


  SmallVector<SDValue, 8> Res(Factor);


  for (unsigned i = 0U; i < Factor; ++i)

    Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,

                         DAG.getTargetConstant(i, DL, MVT::i32));


  return DAG.getMergeValues(Res, DL);

}


SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,

                                                    SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VecVT = Op.getSimpleValueType();


  const unsigned Factor = Op.getNumOperands();

  assert(Factor <= 8);


  // i1 vectors need to be widened to i8

  if (VecVT.getVectorElementType() == MVT::i1)

    return widenVectorOpsToi8(Op, DL, DAG);


  // Convert to scalable vectors first.

  if (VecVT.isFixedLengthVector()) {

    MVT ContainerVT = getContainerForFixedLengthVector(VecVT);

    SmallVector<SDValue, 8> Ops(Factor);

    for (unsigned i = 0U; i < Factor; ++i)

      Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,

                                       Subtarget);


    SmallVector<EVT, 8> VTs(Factor, ContainerVT);

    SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);


    SmallVector<SDValue, 8> Res(Factor);

    for (unsigned i = 0U; i < Factor; ++i)

      Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,

                                         Subtarget);

    return DAG.getMergeValues(Res, DL);

  }


  MVT XLenVT = Subtarget.getXLenVT();

  auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);


  // If the VT is larger than LMUL=8, we need to split and reassemble.

  if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >

      (8 * RISCV::RVVBitsPerBlock)) {

    SmallVector<SDValue, 8> Ops(Factor * 2);

    for (unsigned i = 0; i != Factor; ++i) {

      auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);

      Ops[i] = OpLo;

      Ops[i + Factor] = OpHi;

    }


    SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());


    SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,

                                 ArrayRef(Ops).take_front(Factor)),

                     DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,

                                 ArrayRef(Ops).drop_front(Factor))};


    SmallVector<SDValue, 8> Concats(Factor);

    for (unsigned i = 0; i != Factor; ++i) {

      unsigned IdxLo = 2 * i;

      unsigned IdxHi = 2 * i + 1;

      Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,

                               Res[IdxLo / Factor].getValue(IdxLo % Factor),

                               Res[IdxHi / Factor].getValue(IdxHi % Factor));

    }


    return DAG.getMergeValues(Concats, DL);

  }


  SDValue Interleaved;


  // Spill to the stack using a segment store for simplicity.

  if (Factor != 2) {

    EVT MemVT =

        EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(),

                         VecVT.getVectorElementCount() * Factor);


    // Allocate a stack slot.

    Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);

    SDValue StackPtr =

        DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);

    EVT PtrVT = StackPtr.getValueType();

    auto &MF = DAG.getMachineFunction();

    auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

    auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);


    static const Intrinsic::ID IntrIds[] = {

        Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,

        Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,

        Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,

        Intrinsic::riscv_vsseg8_mask,

    };


    unsigned Sz =

        Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();

    EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);


    SDValue StoredVal = DAG.getUNDEF(VecTupTy);

    for (unsigned i = 0; i < Factor; i++)

      StoredVal =

          DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,

                      Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));


    SDValue Ops[] = {DAG.getEntryNode(),

                     DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),

                     StoredVal,

                     StackPtr,

                     Mask,

                     VL,

                     DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()),

                                           DL, XLenVT)};


    SDValue Chain = DAG.getMemIntrinsicNode(

        ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,

        VecVT.getVectorElementType(), PtrInfo, Alignment,

        MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer());


    SmallVector<SDValue, 8> Loads(Factor);


    SDValue Increment = DAG.getTypeSize(DL, PtrVT, VecVT.getStoreSize());

    for (unsigned i = 0; i != Factor; ++i) {

      if (i != 0)

        StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);


      Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);

    }


    return DAG.getMergeValues(Loads, DL);

  }


  if (Subtarget.hasStdExtZvzip() && !Op.getOperand(0).isUndef() &&

      !Op.getOperand(1).isUndef()) {

    MVT VT = Op->getSimpleValueType(0);

    if (isLegalVTForZvzipOperand(VT, Subtarget)) {

      // Freeze the sources so we can increase their use count.

      SDValue V1 = DAG.getFreeze(Op->getOperand(0));

      SDValue V2 = DAG.getFreeze(Op->getOperand(1));

      SDValue Interleaved = lowerZvzipVZIP(V1, V2, DL, DAG, Subtarget);

      SDValue Lo = DAG.getExtractSubvector(DL, VT, Interleaved, 0);

      SDValue Hi = DAG.getExtractSubvector(DL, VT, Interleaved,

                                           VT.getVectorMinNumElements());

      return DAG.getMergeValues({Lo, Hi}, DL);

    }

  }


  // If the element type is smaller than ELEN, then we can interleave with

  // vwaddu.vv and vwmaccu.vx

  if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {

    Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,

                                        DAG, Subtarget);

  } else {

    // Otherwise, fallback to using vrgathere16.vv

    MVT ConcatVT =

      MVT::getVectorVT(VecVT.getVectorElementType(),

                       VecVT.getVectorElementCount().multiplyCoefficientBy(2));

    SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,

                                 Op.getOperand(0), Op.getOperand(1));


    MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);


    // 0 1 2 3 4 5 6 7 ...

    SDValue StepVec = DAG.getStepVector(DL, IdxVT);


    // 1 1 1 1 1 1 1 1 ...

    SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));


    // 1 0 1 0 1 0 1 0 ...

    SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);

    OddMask = DAG.getSetCC(

        DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,

        DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),

        ISD::CondCode::SETNE);


    SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));


    // Build up the index vector for interleaving the concatenated vector

    //      0      0      1      1      2      2      3      3 ...

    SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);

    //      0      n      1    n+1      2    n+2      3    n+3 ...

    Idx =

        DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);


    // Then perform the interleave

    //   v[0]   v[n]   v[1] v[n+1]   v[2] v[n+2]   v[3] v[n+3] ...

    SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);

    Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,

                              Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);

  }


  // Extract the two halves from the interleaved result

  SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);

  SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,

                                       VecVT.getVectorMinNumElements());


  return DAG.getMergeValues({Lo, Hi}, DL);

}


// Lower step_vector to the vid instruction. Any non-identity step value must

// be accounted for my manual expansion.

SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,

                                              SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  assert(VT.isScalableVector() && "Expected scalable vector");

  MVT XLenVT = Subtarget.getXLenVT();

  auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);

  SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);

  uint64_t StepValImm = Op.getConstantOperandVal(0);

  if (StepValImm != 1) {

    if (isPowerOf2_64(StepValImm)) {

      SDValue StepVal =

          DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),

                      DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);

      StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);

    } else {

      SDValue StepVal = lowerScalarSplat(

          SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),

          VL, VT, DL, DAG, Subtarget);

      StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);

    }

  }

  return StepVec;

}


// Implement vector_reverse using vrgather.vv with indices determined by

// subtracting the id of each element from (VLMAX-1). This will convert

// the indices like so:

// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).

// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.

SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,

                                                 SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VecVT = Op.getSimpleValueType();

  if (VecVT.getVectorElementType() == MVT::i1) {

    MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());

    SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));

    SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);

    return DAG.getSetCC(DL, VecVT, Op2,

                        DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);

  }


  MVT ContainerVT = VecVT;

  SDValue Vec = Op.getOperand(0);

  if (VecVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VecVT);

    Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

  }


  MVT XLenVT = Subtarget.getXLenVT();

  auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);


  // On some uarchs vrgather.vv will read from every input register for each

  // output register, regardless of the indices. However to reverse a vector

  // each output register only needs to read from one register. So decompose it

  // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of

  // O(LMUL^2).

  //

  // vsetvli a1, zero, e64, m4, ta, ma

  // vrgatherei16.vv v12, v8, v16

  // ->

  // vsetvli a1, zero, e64, m1, ta, ma

  // vrgather.vv v15, v8, v16

  // vrgather.vv v14, v9, v16

  // vrgather.vv v13, v10, v16

  // vrgather.vv v12, v11, v16

  if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&

      ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {

    auto [Lo, Hi] = DAG.SplitVector(Vec, DL);

    Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getValueType(), Lo);

    Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getValueType(), Hi);

    SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);


    // Fixed length vectors might not fit exactly into their container, and so

    // leave a gap in the front of the vector after being reversed. Slide this

    // away.

    //

    // x x x x 3 2 1 0 <- v4i16 @ vlen=128

    // 0 1 2 3 x x x x <- reverse

    // x x x x 0 1 2 3 <- vslidedown.vx

    if (VecVT.isFixedLengthVector()) {

      SDValue Offset = DAG.getNode(

          ISD::SUB, DL, XLenVT,

          DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),

          DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));

      Concat =

          getVSlidedown(DAG, Subtarget, DL, ContainerVT,

                        DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);

      Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);

    }

    return Concat;

  }


  unsigned EltSize = ContainerVT.getScalarSizeInBits();

  unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();

  unsigned VectorBitsMax = Subtarget.getRealMaxVLen();

  unsigned MaxVLMAX =

      VecVT.isFixedLengthVector()

          ? VecVT.getVectorNumElements()

          : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);


  unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;

  MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();


  // If this is SEW=8 and VLMAX is potentially more than 256, we need

  // to use vrgatherei16.vv.

  if (MaxVLMAX > 256 && EltSize == 8) {

    // If this is LMUL=8, we have to split before can use vrgatherei16.vv.

    // Reverse each half, then reassemble them in reverse order.

    // NOTE: It's also possible that after splitting that VLMAX no longer

    // requires vrgatherei16.vv.

    if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {

      auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);

      auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);

      Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);

      Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);

      // Reassemble the low and high pieces reversed.

      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);

    }


    // Just promote the int type to i16 which will double the LMUL.

    IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());

    GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;

  }


  // At LMUL > 1, do the index computation in 16 bits to reduce register

  // pressure.

  if (IntVT.getScalarType().bitsGT(MVT::i16) &&

      IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {

    assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b

    GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;

    IntVT = IntVT.changeVectorElementType(MVT::i16);

  }


  // Calculate VLMAX-1 for the desired SEW.

  SDValue VLMinus1 = DAG.getNode(

      ISD::SUB, DL, XLenVT,

      DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),

      DAG.getConstant(1, DL, XLenVT));


  // Splat VLMAX-1 taking care to handle SEW==64 on RV32.

  bool IsRV32E64 =

      !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;

  SDValue SplatVL;

  if (!IsRV32E64)

    SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);

  else

    SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),

                          VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));


  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);

  SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,

                                DAG.getUNDEF(IntVT), Mask, VL);


  SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,

                               DAG.getUNDEF(ContainerVT), Mask, VL);

  if (VecVT.isFixedLengthVector())

    Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);

  return Gather;

}


SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,

                                                SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue V1 = Op.getOperand(0);

  SDValue V2 = Op.getOperand(1);

  SDValue Offset = Op.getOperand(2);

  MVT XLenVT = Subtarget.getXLenVT();

  MVT VecVT = Op.getSimpleValueType();


  SDValue VLMax = computeVLMax(VecVT, DL, DAG);


  SDValue DownOffset, UpOffset;

  if (Op.getOpcode() == ISD::VECTOR_SPLICE_LEFT) {

    // The operand is a TargetConstant, we need to rebuild it as a regular

    // constant.

    DownOffset = Offset;

    UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, Offset);

  } else {

    // The operand is a TargetConstant, we need to rebuild it as a regular

    // constant rather than negating the original operand.

    UpOffset = Offset;

    DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, Offset);

  }


  SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);


  SDValue SlideDown = getVSlidedown(

      DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,

      Subtarget.hasVLDependentLatency() ? UpOffset

                                        : DAG.getRegister(RISCV::X0, XLenVT));

  return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,

                     TrueMask, DAG.getRegister(RISCV::X0, XLenVT),

                     RISCVVType::TAIL_AGNOSTIC);

}


SDValue

RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,

                                                     SelectionDAG &DAG) const {

  SDLoc DL(Op);

  auto *Load = cast<LoadSDNode>(Op);


  assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                        Load->getMemoryVT(),

                                        *Load->getMemOperand()) &&

         "Expecting a correctly-aligned load");


  MVT VT = Op.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();

  MVT ContainerVT = getContainerForFixedLengthVector(VT);


  // If we know the exact VLEN and our fixed length vector completely fills

  // the container, use a whole register load instead.

  const auto [MinVLMAX, MaxVLMAX] =

      RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);

  if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&

      RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {

    MachineMemOperand *MMO = Load->getMemOperand();

    SDValue NewLoad =

        DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),

                    MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),

                    MMO->getAAInfo(), MMO->getRanges());

    SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);

    return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);

  }


  SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);


  bool IsMaskOp = VT.getVectorElementType() == MVT::i1;

  SDValue IntID = DAG.getTargetConstant(

      IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);

  SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};

  if (!IsMaskOp)

    Ops.push_back(DAG.getUNDEF(ContainerVT));

  Ops.push_back(Load->getBasePtr());

  Ops.push_back(VL);

  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

  SDValue NewLoad =

      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,

                              Load->getMemoryVT(), Load->getMemOperand());


  SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);

  return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);

}


SDValue

RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,

                                                      SelectionDAG &DAG) const {

  SDLoc DL(Op);

  auto *Store = cast<StoreSDNode>(Op);


  assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                        Store->getMemoryVT(),

                                        *Store->getMemOperand()) &&

         "Expecting a correctly-aligned store");


  SDValue StoreVal = Store->getValue();

  MVT VT = StoreVal.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  // If the size less than a byte, we need to pad with zeros to make a byte.

  if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {

    VT = MVT::v8i1;

    StoreVal =

        DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);

  }


  MVT ContainerVT = getContainerForFixedLengthVector(VT);


  SDValue NewValue =

      convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);


  // If we know the exact VLEN and our fixed length vector completely fills

  // the container, use a whole register store instead.

  const auto [MinVLMAX, MaxVLMAX] =

      RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);

  if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&

      RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {

    MachineMemOperand *MMO = Store->getMemOperand();

    return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),

                        MMO->getPointerInfo(), MMO->getBaseAlign(),

                        MMO->getFlags(), MMO->getAAInfo());

  }


  SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);


  bool IsMaskOp = VT.getVectorElementType() == MVT::i1;

  SDValue IntID = DAG.getTargetConstant(

      IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);

  return DAG.getMemIntrinsicNode(

      ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),

      {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},

      Store->getMemoryVT(), Store->getMemOperand());

}


SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,

                                             SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();


  const auto *MemSD = cast<MemSDNode>(Op);

  EVT MemVT = MemSD->getMemoryVT();

  MachineMemOperand *MMO = MemSD->getMemOperand();

  SDValue Chain = MemSD->getChain();

  SDValue BasePtr = MemSD->getBasePtr();


  SDValue Mask, PassThru, VL;

  bool IsExpandingLoad = false;

  if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {

    Mask = VPLoad->getMask();

    PassThru = DAG.getUNDEF(VT);

    VL = VPLoad->getVectorLength();

  } else {

    const auto *MLoad = cast<MaskedLoadSDNode>(Op);

    Mask = MLoad->getMask();

    PassThru = MLoad->getPassThru();

    IsExpandingLoad = MLoad->isExpandingLoad();

  }


  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());


  MVT XLenVT = Subtarget.getXLenVT();


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);

    if (!IsUnmasked) {

      MVT MaskVT = getMaskTypeFor(ContainerVT);

      Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

    }

  }


  if (!VL)

    VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;


  SDValue ExpandingVL;

  if (!IsUnmasked && IsExpandingLoad) {

    ExpandingVL = VL;

    VL =

        DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,

                    getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);

  }


  unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle

                                                 : Intrinsic::riscv_vle_mask;

  SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};

  if (IntID == Intrinsic::riscv_vle)

    Ops.push_back(DAG.getUNDEF(ContainerVT));

  else

    Ops.push_back(PassThru);

  Ops.push_back(BasePtr);

  if (IntID == Intrinsic::riscv_vle_mask)

    Ops.push_back(Mask);

  Ops.push_back(VL);

  if (IntID == Intrinsic::riscv_vle_mask)

    Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));


  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});


  SDValue Result =

      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);

  Chain = Result.getValue(1);

  if (ExpandingVL) {

    MVT IndexVT = ContainerVT;

    if (ContainerVT.isFloatingPoint())

      IndexVT = ContainerVT.changeVectorElementTypeToInteger();


    MVT IndexEltVT = IndexVT.getVectorElementType();

    bool UseVRGATHEREI16 = false;

    // If index vector is an i8 vector and the element count exceeds 256, we

    // should change the element type of index vector to i16 to avoid

    // overflow.

    if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {

      // FIXME: We need to do vector splitting manually for LMUL=8 cases.

      assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);

      IndexVT = IndexVT.changeVectorElementType(MVT::i16);

      UseVRGATHEREI16 = true;

    }


    SDValue Iota =

        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,

                    DAG.getTargetConstant(Intrinsic::riscv_viota, DL, XLenVT),

                    DAG.getUNDEF(IndexVT), Mask, ExpandingVL);

    Result =

        DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL

                                    : RISCVISD::VRGATHER_VV_VL,

                    DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);

  }


  if (VT.isFixedLengthVector())

    Result = convertFromScalableVector(VT, Result, DAG, Subtarget);


  return DAG.getMergeValues({Result, Chain}, DL);

}


SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op->getSimpleValueType(0);


  const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);

  EVT MemVT = VPLoadFF->getMemoryVT();

  MachineMemOperand *MMO = VPLoadFF->getMemOperand();

  SDValue Chain = VPLoadFF->getChain();

  SDValue BasePtr = VPLoadFF->getBasePtr();


  SDValue Mask = VPLoadFF->getMask();

  SDValue VL = VPLoadFF->getVectorLength();


  MVT XLenVT = Subtarget.getXLenVT();


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    MVT MaskVT = getMaskTypeFor(ContainerVT);

    Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

  }


  unsigned IntID = Intrinsic::riscv_vleff_mask;

  SDValue Ops[] = {

      Chain,

      DAG.getTargetConstant(IntID, DL, XLenVT),

      DAG.getUNDEF(ContainerVT),

      BasePtr,

      Mask,

      VL,

      DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT)};


  SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});


  SDValue Result =

      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);

  SDValue OutVL = Result.getValue(1);

  Chain = Result.getValue(2);


  if (VT.isFixedLengthVector())

    Result = convertFromScalableVector(VT, Result, DAG, Subtarget);


  return DAG.getMergeValues({Result, OutVL, Chain}, DL);

}


SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,

                                              SelectionDAG &DAG) const {

  SDLoc DL(Op);


  const auto *MemSD = cast<MemSDNode>(Op);

  EVT MemVT = MemSD->getMemoryVT();

  MachineMemOperand *MMO = MemSD->getMemOperand();

  SDValue Chain = MemSD->getChain();

  SDValue BasePtr = MemSD->getBasePtr();

  SDValue Val, Mask, VL;


  bool IsCompressingStore = false;

  if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {

    Val = VPStore->getValue();

    Mask = VPStore->getMask();

    VL = VPStore->getVectorLength();

  } else {

    const auto *MStore = cast<MaskedStoreSDNode>(Op);

    Val = MStore->getValue();

    Mask = MStore->getMask();

    IsCompressingStore = MStore->isCompressingStore();

  }


  bool IsUnmasked =

      ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;


  MVT VT = Val.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);


    Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);

    if (!IsUnmasked || IsCompressingStore) {

      MVT MaskVT = getMaskTypeFor(ContainerVT);

      Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

    }

  }


  if (!VL)

    VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;


  if (IsCompressingStore) {

    Val = DAG.getNode(

        ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,

        DAG.getTargetConstant(Intrinsic::riscv_vcompress, DL, XLenVT),

        DAG.getUNDEF(ContainerVT), Val, Mask, VL);

    VL =

        DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,

                    getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);

  }


  unsigned IntID =

      IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;

  SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};

  Ops.push_back(Val);

  Ops.push_back(BasePtr);

  if (!IsUnmasked)

    Ops.push_back(Mask);

  Ops.push_back(VL);


  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,

                                 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);

}


SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,

                                                 SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Val = Op.getOperand(0);

  SDValue Mask = Op.getOperand(1);

  SDValue Passthru = Op.getOperand(2);


  MVT VT = Val.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    MVT MaskVT = getMaskTypeFor(ContainerVT);

    Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);

    Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

    Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);

  }


  SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;

  SDValue Res =

      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,

                  DAG.getTargetConstant(Intrinsic::riscv_vcompress, DL, XLenVT),

                  Passthru, Val, Mask, VL);


  if (VT.isFixedLengthVector())

    Res = convertFromScalableVector(VT, Res, DAG, Subtarget);


  return Res;

}


SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,

                                                     SelectionDAG &DAG) const {

  unsigned Opc = Op.getOpcode();

  SDLoc DL(Op);

  SDValue Chain = Op.getOperand(0);

  SDValue Op1 = Op.getOperand(1);

  SDValue Op2 = Op.getOperand(2);

  SDValue CC = Op.getOperand(3);

  ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();

  MVT VT = Op.getSimpleValueType();

  MVT InVT = Op1.getSimpleValueType();


  // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE

  // condition code.

  if (Opc == ISD::STRICT_FSETCCS) {

    // Expand strict_fsetccs(x, oeq) to

    // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))

    SDVTList VTList = Op->getVTList();

    if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {

      SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);

      SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,

                                 Op2, OLECCVal);

      SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,

                                 Op1, OLECCVal);

      SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,

                                     Tmp1.getValue(1), Tmp2.getValue(1));

      // Tmp1 and Tmp2 might be the same node.

      if (Tmp1 != Tmp2)

        Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);

      return DAG.getMergeValues({Tmp1, OutChain}, DL);

    }


    // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))

    if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {

      SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);

      SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,

                                Op2, OEQCCVal);

      SDValue Res = DAG.getNOT(DL, OEQ, VT);

      return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);

    }

  }


  MVT ContainerInVT = InVT;

  if (InVT.isFixedLengthVector()) {

    ContainerInVT = getContainerForFixedLengthVector(InVT);

    Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);

    Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);

  }

  MVT MaskVT = getMaskTypeFor(ContainerInVT);


  auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);


  SDValue Res;

  if (Opc == ISD::STRICT_FSETCC &&

      (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||

       CCVal == ISD::SETOLE)) {

    // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only

    // active when both input elements are ordered.

    SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);

    SDValue OrderMask1 = DAG.getNode(

        RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),

        {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),

         True, VL});

    SDValue OrderMask2 = DAG.getNode(

        RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),

        {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),

         True, VL});

    Mask =

        DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);

    // Use Mask as the passthru operand to let the result be 0 if either of the

    // inputs is unordered.

    Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,

                      DAG.getVTList(MaskVT, MVT::Other),

                      {Chain, Op1, Op2, CC, Mask, Mask, VL});

  } else {

    unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL

                                                : RISCVISD::STRICT_FSETCCS_VL;

    Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),

                      {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});

  }


  if (VT.isFixedLengthVector()) {

    SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);

    return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);

  }

  return Res;

}


// Lower vector ABS to smax(X, sub(0, X)).

SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  SDValue X = Op.getOperand(0);


  assert(VT.isFixedLengthVector() && "Unexpected type for ISD::ABS");


  MVT ContainerVT = getContainerForFixedLengthVector(VT);

  X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);


  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);


  SDValue Result;

  if (Subtarget.hasStdExtZvabd()) {

    Result = DAG.getNode(RISCVISD::ABS_VL, DL, ContainerVT, X,

                         DAG.getUNDEF(ContainerVT), Mask, VL);

  } else {

    SDValue SplatZero = DAG.getNode(

        RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),

        DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);

    SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,

                               DAG.getUNDEF(ContainerVT), Mask, VL);

    Result = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,

                         DAG.getUNDEF(ContainerVT), Mask, VL);

  }

  return convertFromScalableVector(VT, Result, DAG, Subtarget);

}


SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,

                                               SelectionDAG &DAG) const {

  const auto &TSInfo =

      static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());


  unsigned NewOpc = getRISCVVLOp(Op);

  bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);

  bool HasMask = TSInfo.hasMaskOp(NewOpc);


  MVT VT = Op.getSimpleValueType();

  MVT ContainerVT = getContainerForFixedLengthVector(VT);


  // Create list of operands by converting existing ones to scalable types.

  SmallVector<SDValue, 6> Ops;

  for (const SDValue &V : Op->op_values()) {

    assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");


    // Pass through non-vector operands.

    if (!V.getValueType().isVector()) {

      Ops.push_back(V);

      continue;

    }


    // "cast" fixed length vector to a scalable vector.

    assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&

           "Only fixed length vectors are supported!");

    MVT VContainerVT = ContainerVT.changeVectorElementType(

        V.getSimpleValueType().getVectorElementType());

    Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));

  }


  SDLoc DL(Op);

  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

  if (HasPassthruOp)

    Ops.push_back(DAG.getUNDEF(ContainerVT));

  if (HasMask)

    Ops.push_back(Mask);

  Ops.push_back(VL);


  // StrictFP operations have two result values. Their lowered result should

  // have same result count.

  if (Op->isStrictFPOpcode()) {

    SDValue ScalableRes =

        DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,

                    Op->getFlags());

    SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);

    return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);

  }


  SDValue ScalableRes =

      DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());

  return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);

}


// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:

// * Operands of each node are assumed to be in the same order.

// * The EVL operand is promoted from i32 to i64 on RV64.

// * Fixed-length vectors are converted to their scalable-vector container

//   types.

SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {

  const auto &TSInfo =

      static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());


  unsigned RISCVISDOpc = getRISCVVLOp(Op);

  bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);


  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  SmallVector<SDValue, 4> Ops;


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector())

    ContainerVT = getContainerForFixedLengthVector(VT);


  for (const auto &OpIdx : enumerate(Op->ops())) {

    SDValue V = OpIdx.value();

    assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");

    // Add dummy passthru value before the mask. Or if there isn't a mask,

    // before EVL.

    if (HasPassthruOp) {

      auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());

      if (MaskIdx) {

        if (*MaskIdx == OpIdx.index())

          Ops.push_back(DAG.getUNDEF(ContainerVT));

      } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==

                 OpIdx.index()) {

        assert(Op.getOpcode() == ISD::VP_MERGE);

        // For VP_MERGE, copy the false operand instead of an undef value.

        Ops.push_back(Ops.back());

      }

    }

    // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.

    if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&

        ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())

      Ops.push_back(DAG.getTargetConstant(RISCVFPRndMode::DYN, DL,

                                          Subtarget.getXLenVT()));

    // Pass through operands which aren't fixed-length vectors.

    if (!V.getValueType().isFixedLengthVector()) {

      Ops.push_back(V);

      continue;

    }

    // "cast" fixed length vector to a scalable vector.

    MVT OpVT = V.getSimpleValueType();

    MVT ContainerVT = getContainerForFixedLengthVector(OpVT);

    assert(useRVVForFixedLengthVectorVT(OpVT) &&

           "Only fixed length vectors are supported!");

    Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));

  }


  if (!VT.isFixedLengthVector())

    return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());


  SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());


  return convertFromScalableVector(VT, VPOp, DAG, Subtarget);

}


SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,

                                              SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  SDValue Mask = Op.getOperand(0);

  SDValue TrueVal = Op.getOperand(1);

  SDValue FalseVal = Op.getOperand(2);

  SDValue VL = Op.getOperand(3);


  // Use default legalization if a vector of EVL type would be legal.

  EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),

                                  VT.getVectorElementCount());

  if (isTypeLegal(EVLVecVT))

    return SDValue();


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);

    TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);

    FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);

  }


  // Promote to a vector of i8.

  MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);


  // Promote TrueVal and FalseVal using VLMax.

  // FIXME: Is there a better way to do this?

  SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);

  SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,

                                 DAG.getUNDEF(PromotedVT),

                                 DAG.getConstant(1, DL, XLenVT), VLMax);

  SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,

                                  DAG.getUNDEF(PromotedVT),

                                  DAG.getConstant(0, DL, XLenVT), VLMax);

  TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,

                        SplatZero, DAG.getUNDEF(PromotedVT), VL);

  // Any element past VL uses FalseVal, so use VLMax

  FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,

                         SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);


  // VP_MERGE the two promoted values.

  SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,

                                TrueVal, FalseVal, FalseVal, VL);


  // Convert back to mask.

  SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);

  SDValue Result = DAG.getNode(

      RISCVISD::SETCC_VL, DL, ContainerVT,

      {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),

       DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});


  if (VT.isFixedLengthVector())

    Result = convertFromScalableVector(VT, Result, DAG, Subtarget);

  return Result;

}


SDValue

RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,

                                               SelectionDAG &DAG) const {

  using namespace SDPatternMatch;


  SDLoc DL(Op);


  SDValue Op1 = Op.getOperand(0);

  SDValue Op2 = Op.getOperand(1);

  SDValue Offset = Op.getOperand(2);

  SDValue Mask = Op.getOperand(3);

  SDValue EVL1 = Op.getOperand(4);

  SDValue EVL2 = Op.getOperand(5);


  const MVT XLenVT = Subtarget.getXLenVT();

  MVT VT = Op.getSimpleValueType();

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

    Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);

    MVT MaskVT = getMaskTypeFor(ContainerVT);

    Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

  }


  bool IsMaskVector = VT.getVectorElementType() == MVT::i1;

  if (IsMaskVector) {

    ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);


    // Expand input operands

    SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                                      DAG.getUNDEF(ContainerVT),

                                      DAG.getConstant(1, DL, XLenVT), EVL1);

    SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                                       DAG.getUNDEF(ContainerVT),

                                       DAG.getConstant(0, DL, XLenVT), EVL1);

    Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,

                      SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);


    SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                                      DAG.getUNDEF(ContainerVT),

                                      DAG.getConstant(1, DL, XLenVT), EVL2);

    SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                                       DAG.getUNDEF(ContainerVT),

                                       DAG.getConstant(0, DL, XLenVT), EVL2);

    Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,

                      SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);

  }


  auto getVectorFirstEle = [](SDValue Vec) {

    SDValue FirstEle;

    if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))

      return FirstEle;


    if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||

        Vec.getOpcode() == ISD::BUILD_VECTOR)

      return Vec.getOperand(0);


    return SDValue();

  };


  if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))

    if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {

      MVT EltVT = ContainerVT.getVectorElementType();

      SDValue Result;

      if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

          (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {

        EltVT = EltVT.changeTypeToInteger();

        ContainerVT = ContainerVT.changeVectorElementType(EltVT);

        Op2 = DAG.getBitcast(ContainerVT, Op2);

        FirstEle =

            DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);

      }

      Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL

                                                   : RISCVISD::VSLIDE1UP_VL,

                           DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,

                           FirstEle, Mask, EVL2);

      Result = DAG.getBitcast(

          ContainerVT.changeVectorElementType(VT.getVectorElementType()),

          Result);

      return VT.isFixedLengthVector()

                 ? convertFromScalableVector(VT, Result, DAG, Subtarget)

                 : Result;

    }


  int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();

  SDValue DownOffset, UpOffset;

  if (ImmValue >= 0) {

    // The operand is a TargetConstant, we need to rebuild it as a regular

    // constant.

    DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);

    UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);

  } else {

    // The operand is a TargetConstant, we need to rebuild it as a regular

    // constant rather than negating the original operand.

    UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);

    DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);

  }


  if (ImmValue != 0)

    Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,

                        DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,

                        Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);

  SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,

                               UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);


  if (IsMaskVector) {

    // Truncate Result back to a mask vector (Result has same EVL as Op2)

    Result = DAG.getNode(

        RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),

        {Result, DAG.getConstant(0, DL, ContainerVT),

         DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),

         Mask, EVL2});

  }


  if (!VT.isFixedLengthVector())

    return Result;

  return convertFromScalableVector(VT, Result, DAG, Subtarget);

}


SDValue

RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,

                                                SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  SDValue Op1 = Op.getOperand(0);

  SDValue Mask = Op.getOperand(1);

  SDValue EVL = Op.getOperand(2);


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

    MVT MaskVT = getMaskTypeFor(ContainerVT);

    Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

  }


  MVT GatherVT = ContainerVT;

  MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();

  // Check if we are working with mask vectors

  bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;

  if (IsMaskVector) {

    GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);


    // Expand input operand

    SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,

                                   DAG.getUNDEF(IndicesVT),

                                   DAG.getConstant(1, DL, XLenVT), EVL);

    SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,

                                    DAG.getUNDEF(IndicesVT),

                                    DAG.getConstant(0, DL, XLenVT), EVL);

    Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,

                      SplatZero, DAG.getUNDEF(IndicesVT), EVL);

  }


  unsigned EltSize = GatherVT.getScalarSizeInBits();

  unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();

  unsigned VectorBitsMax = Subtarget.getRealMaxVLen();

  unsigned MaxVLMAX =

      RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);


  unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;

  // If this is SEW=8 and VLMAX is unknown or more than 256, we need

  // to use vrgatherei16.vv.

  // TODO: It's also possible to use vrgatherei16.vv for other types to

  // decrease register width for the index calculation.

  // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.

  if (MaxVLMAX > 256 && EltSize == 8) {

    // If this is LMUL=8, we have to split before using vrgatherei16.vv.

    // Split the vector in half and reverse each half using a full register

    // reverse.

    // Swap the halves and concatenate them.

    // Slide the concatenated result by (VLMax - VL).

    if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {

      auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);

      auto [Lo, Hi] = DAG.SplitVector(Op1, DL);


      SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);

      SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);


      // Reassemble the low and high pieces reversed.

      // NOTE: this Result is unmasked (because we do not need masks for

      // shuffles). If in the future this has to change, we can use a SELECT_VL

      // between Result and UNDEF using the mask originally passed to VP_REVERSE

      SDValue Result =

          DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);


      // Slide off any elements from past EVL that were reversed into the low

      // elements.

      SDValue VLMax =

          DAG.getElementCount(DL, XLenVT, GatherVT.getVectorElementCount());

      SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);


      Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,

                             DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);


      if (IsMaskVector) {

        // Truncate Result back to a mask vector

        Result =

            DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,

                        {Result, DAG.getConstant(0, DL, GatherVT),

                         DAG.getCondCode(ISD::SETNE),

                         DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});

      }


      if (!VT.isFixedLengthVector())

        return Result;

      return convertFromScalableVector(VT, Result, DAG, Subtarget);

    }


    // Just promote the int type to i16 which will double the LMUL.

    IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());

    GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;

  }


  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);

  SDValue VecLen =

      DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));

  SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,

                                    DAG.getUNDEF(IndicesVT), VecLen, EVL);

  SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,

                              DAG.getUNDEF(IndicesVT), Mask, EVL);

  SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,

                               DAG.getUNDEF(GatherVT), Mask, EVL);


  if (IsMaskVector) {

    // Truncate Result back to a mask vector

    Result = DAG.getNode(

        RISCVISD::SETCC_VL, DL, ContainerVT,

        {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),

         DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});

  }


  if (!VT.isFixedLengthVector())

    return Result;

  return convertFromScalableVector(VT, Result, DAG, Subtarget);

}


SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,

                                                SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();

  MVT VT = Op.getSimpleValueType();

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector())

    ContainerVT = getContainerForFixedLengthVector(VT);


  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});


  auto *VPNode = cast<VPStridedLoadSDNode>(Op);

  // Check if the mask is known to be all ones

  SDValue Mask = VPNode->getMask();

  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());


  SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse

                                                   : Intrinsic::riscv_vlse_mask,

                                        DL, XLenVT);

  SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,

                              DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),

                              VPNode->getStride()};

  if (!IsUnmasked) {

    if (VT.isFixedLengthVector()) {

      MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);

      Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

    }

    Ops.push_back(Mask);

  }

  Ops.push_back(VPNode->getVectorLength());

  if (!IsUnmasked) {

    SDValue Policy =

        DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);

    Ops.push_back(Policy);

  }


  SDValue Result =

      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,

                              VPNode->getMemoryVT(), VPNode->getMemOperand());

  SDValue Chain = Result.getValue(1);


  if (VT.isFixedLengthVector())

    Result = convertFromScalableVector(VT, Result, DAG, Subtarget);


  return DAG.getMergeValues({Result, Chain}, DL);

}


SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,

                                                 SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT XLenVT = Subtarget.getXLenVT();


  auto *VPNode = cast<VPStridedStoreSDNode>(Op);

  SDValue StoreVal = VPNode->getValue();

  MVT VT = StoreVal.getSimpleValueType();

  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);

  }


  // Check if the mask is known to be all ones

  SDValue Mask = VPNode->getMask();

  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());


  SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse

                                                   : Intrinsic::riscv_vsse_mask,

                                        DL, XLenVT);

  SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,

                              VPNode->getBasePtr(), VPNode->getStride()};

  if (!IsUnmasked) {

    if (VT.isFixedLengthVector()) {

      MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);

      Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

    }

    Ops.push_back(Mask);

  }

  Ops.push_back(VPNode->getVectorLength());


  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),

                                 Ops, VPNode->getMemoryVT(),

                                 VPNode->getMemOperand());

}


// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be

// matched to a RVV indexed load. The RVV indexed load instructions only

// support the "unsigned unscaled" addressing mode; indices are implicitly

// zero-extended or truncated to XLEN and are treated as byte offsets. Any

// signed or scaled indexing is extended to the XLEN value type and scaled

// accordingly.

SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,

                                               SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MVT VT = Op.getSimpleValueType();


  const auto *MemSD = cast<MemSDNode>(Op.getNode());

  EVT MemVT = MemSD->getMemoryVT();

  MachineMemOperand *MMO = MemSD->getMemOperand();

  SDValue Chain = MemSD->getChain();

  SDValue BasePtr = MemSD->getBasePtr();


  [[maybe_unused]] ISD::LoadExtType LoadExtType;

  SDValue Index, Mask, PassThru, VL;


  if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {

    Index = VPGN->getIndex();

    Mask = VPGN->getMask();

    PassThru = DAG.getUNDEF(VT);

    VL = VPGN->getVectorLength();

    // VP doesn't support extending loads.

    LoadExtType = ISD::NON_EXTLOAD;

  } else {

    // Else it must be a MGATHER.

    auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());

    Index = MGN->getIndex();

    Mask = MGN->getMask();

    PassThru = MGN->getPassThru();

    LoadExtType = MGN->getExtensionType();

  }


  MVT IndexVT = Index.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&

         "Unexpected VTs!");

  assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");

  // Targets have to explicitly opt-in for extending vector loads.

  assert(LoadExtType == ISD::NON_EXTLOAD &&

         "Unexpected extending MGATHER/VP_GATHER");


  // If the mask is known to be all ones, optimize to an unmasked intrinsic;

  // the selection of the masked intrinsics doesn't do this for us.

  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),

                               ContainerVT.getVectorElementCount());


    Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);


    if (!IsUnmasked) {

      MVT MaskVT = getMaskTypeFor(ContainerVT);

      Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

      PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);

    }

  }


  if (!VL)

    VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;


  if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {

    IndexVT = IndexVT.changeVectorElementType(XLenVT);

    Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);

  }


  unsigned IntID =

      IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;

  SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};

  if (IsUnmasked)

    Ops.push_back(DAG.getUNDEF(ContainerVT));

  else

    Ops.push_back(PassThru);

  Ops.push_back(BasePtr);

  Ops.push_back(Index);

  if (!IsUnmasked)

    Ops.push_back(Mask);

  Ops.push_back(VL);

  if (!IsUnmasked)

    Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));


  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

  SDValue Result =

      DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);

  Chain = Result.getValue(1);


  if (VT.isFixedLengthVector())

    Result = convertFromScalableVector(VT, Result, DAG, Subtarget);


  return DAG.getMergeValues({Result, Chain}, DL);

}


// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be

// matched to a RVV indexed store. The RVV indexed store instructions only

// support the "unsigned unscaled" addressing mode; indices are implicitly

// zero-extended or truncated to XLEN and are treated as byte offsets. Any

// signed or scaled indexing is extended to the XLEN value type and scaled

// accordingly.

SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,

                                                SelectionDAG &DAG) const {

  SDLoc DL(Op);

  const auto *MemSD = cast<MemSDNode>(Op.getNode());

  EVT MemVT = MemSD->getMemoryVT();

  MachineMemOperand *MMO = MemSD->getMemOperand();

  SDValue Chain = MemSD->getChain();

  SDValue BasePtr = MemSD->getBasePtr();


  [[maybe_unused]] bool IsTruncatingStore = false;

  SDValue Index, Mask, Val, VL;


  if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {

    Index = VPSN->getIndex();

    Mask = VPSN->getMask();

    Val = VPSN->getValue();

    VL = VPSN->getVectorLength();

    // VP doesn't support truncating stores.

    IsTruncatingStore = false;

  } else {

    // Else it must be a MSCATTER.

    auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());

    Index = MSN->getIndex();

    Mask = MSN->getMask();

    Val = MSN->getValue();

    IsTruncatingStore = MSN->isTruncatingStore();

  }


  MVT VT = Val.getSimpleValueType();

  MVT IndexVT = Index.getSimpleValueType();

  MVT XLenVT = Subtarget.getXLenVT();


  assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&

         "Unexpected VTs!");

  assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");

  // Targets have to explicitly opt-in for extending vector loads and

  // truncating vector stores.

  assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");


  // If the mask is known to be all ones, optimize to an unmasked intrinsic;

  // the selection of the masked intrinsics doesn't do this for us.

  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());


  MVT ContainerVT = VT;

  if (VT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(VT);

    IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),

                               ContainerVT.getVectorElementCount());


    Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);

    Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);


    if (!IsUnmasked) {

      MVT MaskVT = getMaskTypeFor(ContainerVT);

      Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

    }

  }


  if (!VL)

    VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;


  if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {

    IndexVT = IndexVT.changeVectorElementType(XLenVT);

    Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);

  }


  unsigned IntID =

      IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;

  SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};

  Ops.push_back(Val);

  Ops.push_back(BasePtr);

  Ops.push_back(Index);

  if (!IsUnmasked)

    Ops.push_back(Mask);

  Ops.push_back(VL);


  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,

                                 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);

}


SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,

                                               SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);

  SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);

  SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);


  // Encoding used for rounding mode in RISC-V differs from that used in

  // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a

  // table, which consists of a sequence of 4-bit fields, each representing

  // corresponding FLT_ROUNDS mode.

  static const int Table =

      (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |

      (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |

      (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |

      (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |

      (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);


  SDValue Shift =

      DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));

  SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,

                                DAG.getConstant(Table, DL, XLenVT), Shift);

  SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,

                               DAG.getConstant(7, DL, XLenVT));


  return DAG.getMergeValues({Masked, Chain}, DL);

}


SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,

                                               SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue RMValue = Op->getOperand(1);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);


  // Encoding used for rounding mode in RISC-V differs from that used in

  // FLT_ROUNDS. To convert it the C rounding mode is used as an index in

  // a table, which consists of a sequence of 4-bit fields, each representing

  // corresponding RISC-V mode.

  static const unsigned Table =

      (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |

      (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |

      (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |

      (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |

      (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));


  RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);


  SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,

                              DAG.getConstant(2, DL, XLenVT));

  SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,

                                DAG.getConstant(Table, DL, XLenVT), Shift);

  RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,

                        DAG.getConstant(0x7, DL, XLenVT));

  return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,

                     RMValue);

}


SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,

                                            SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);

  SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);

  return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);

}


SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,

                                            SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue EnvValue = Op->getOperand(1);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);


  EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);

  return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,

                     EnvValue);

}


SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,

                                              SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);


  return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,

                     EnvValue);

}


const uint64_t ModeMask64 = ~RISCVExceptFlags::ALL;

const uint32_t ModeMask32 = ~RISCVExceptFlags::ALL;


SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,

                                             SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);

  SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);

  SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);

  Chain = Result.getValue(1);

  return DAG.getMergeValues({Result, Chain}, DL);

}


SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,

                                             SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue EnvValue = Op->getOperand(1);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);

  SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);


  EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);

  EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);

  Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,

                      ModeMask);

  return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,

                     EnvValue);

}


SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,

                                               SelectionDAG &DAG) const {

  const MVT XLenVT = Subtarget.getXLenVT();

  const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);

  SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);


  return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,

                     ModeMask);

}


SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,

                                               SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();


  bool isRISCV64 = Subtarget.is64Bit();

  EVT PtrVT = getPointerTy(DAG.getDataLayout());


  int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);

  return DAG.getFrameIndex(FI, PtrVT);

}


// Returns the opcode of the target-specific SDNode that implements the 32-bit

// form of the given Opcode.


static unsigned getRISCVWOpcode(unsigned Opcode) {

  switch (Opcode) {

  default:

    llvm_unreachable("Unexpected opcode");

  case ISD::SHL:

    return RISCVISD::SLLW;

  case ISD::SRA:

    return RISCVISD::SRAW;

  case ISD::SRL:

    return RISCVISD::SRLW;

  case ISD::SDIV:

    return RISCVISD::DIVW;

  case ISD::UDIV:

    return RISCVISD::DIVUW;

  case ISD::UREM:

    return RISCVISD::REMUW;

  case ISD::ROTL:

    return RISCVISD::ROLW;

  case ISD::ROTR:

    return RISCVISD::RORW;

  }

}


// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG

// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would

// otherwise be promoted to i64, making it difficult to select the

// SLLW/DIVUW/.../*W later one because the fact the operation was originally of

// type i8/i16/i32 is lost.


static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,

                                   unsigned ExtOpc = ISD::ANY_EXTEND) {

  SDLoc DL(N);

  unsigned WOpcode = getRISCVWOpcode(N->getOpcode());

  SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));

  SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));

  SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);

  // ReplaceNodeResults requires we maintain the same type for the return value.

  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);

}


// Converts the given 32-bit operation to a i64 operation with signed extension

// semantic to reduce the signed extension instructions.


static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {

  SDLoc DL(N);

  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));

  SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

  SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);

  SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,

                               DAG.getValueType(MVT::i32));

  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);

}


void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,

                                             SmallVectorImpl<SDValue> &Results,

                                             SelectionDAG &DAG) const {

  SDLoc DL(N);

  switch (N->getOpcode()) {

  default:

    llvm_unreachable("Don't know how to custom type legalize this operation!");

  case ISD::STRICT_FP_TO_SINT:

  case ISD::STRICT_FP_TO_UINT:

  case ISD::FP_TO_SINT:

  case ISD::FP_TO_UINT: {

    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");

    bool IsStrict = N->isStrictFPOpcode();

    bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||

                    N->getOpcode() == ISD::STRICT_FP_TO_SINT;

    SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);

    if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=

        TargetLowering::TypeSoftenFloat) {

      if (!isTypeLegal(Op0.getValueType()))

        return;

      if (IsStrict) {

        SDValue Chain = N->getOperand(0);

        // In absence of Zfh, promote f16 to f32, then convert.

        if (Op0.getValueType() == MVT::f16 &&

            !Subtarget.hasStdExtZfhOrZhinx()) {

          Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},

                            {Chain, Op0});

          Chain = Op0.getValue(1);

        }

        unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64

                                : RISCVISD::STRICT_FCVT_WU_RV64;

        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);

        SDValue Res = DAG.getNode(

            Opc, DL, VTs, Chain, Op0,

            DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));

        Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

        Results.push_back(Res.getValue(1));

        return;

      }

      // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then

      // convert.

      if ((Op0.getValueType() == MVT::f16 &&

           !Subtarget.hasStdExtZfhOrZhinx()) ||

          Op0.getValueType() == MVT::bf16)

        Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);


      unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;

      SDValue Res =

          DAG.getNode(Opc, DL, MVT::i64, Op0,

                      DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    // If the FP type needs to be softened, emit a library call using the 'si'

    // version. If we left it to default legalization we'd end up with 'di'. If

    // the FP type doesn't need to be softened just let generic type

    // legalization promote the result type.

    RTLIB::Libcall LC;

    if (IsSigned)

      LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));

    else

      LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));

    MakeLibCallOptions CallOptions;

    EVT OpVT = Op0.getValueType();

    CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));

    SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();

    SDValue Result;

    std::tie(Result, Chain) =

        makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);

    Results.push_back(Result);

    if (IsStrict)

      Results.push_back(Chain);

    break;

  }

  case ISD::LROUND: {

    SDValue Op0 = N->getOperand(0);

    EVT Op0VT = Op0.getValueType();

    if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=

        TargetLowering::TypeSoftenFloat) {

      if (!isTypeLegal(Op0VT))

        return;


      // In absence of Zfh, promote f16 to f32, then convert.

      if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())

        Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);


      SDValue Res =

          DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,

                      DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    // If the FP type needs to be softened, emit a library call to lround. We'll

    // need to truncate the result. We assume any value that doesn't fit in i32

    // is allowed to return an unspecified value.

    RTLIB::Libcall LC =

        Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;

    MakeLibCallOptions CallOptions;

    EVT OpVT = Op0.getValueType();

    CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);

    SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;

    Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);

    Results.push_back(Result);

    break;

  }

  case ISD::READCYCLECOUNTER:

  case ISD::READSTEADYCOUNTER: {

    assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "

                                   "has custom type legalization on riscv32");


    SDValue LoCounter, HiCounter;

    MVT XLenVT = Subtarget.getXLenVT();

    if (N->getOpcode() == ISD::READCYCLECOUNTER) {

      LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);

      HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);

    } else {

      LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);

      HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);

    }

    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);

    SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,

                              N->getOperand(0), LoCounter, HiCounter);


    Results.push_back(

        DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));

    Results.push_back(RCW.getValue(2));

    break;

  }

  case ISD::LOAD: {

    if (!ISD::isNON_EXTLoad(N))

      return;


    // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the

    // sext_inreg we emit for ADD/SUB/MUL/SLLI.

    LoadSDNode *Ld = cast<LoadSDNode>(N);


    if (N->getValueType(0) == MVT::i64) {

      assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&

             "Unexpected custom legalisation");


      if (Ld->getAlign() < Subtarget.getZilsdAlign())

        return;


      SDLoc DL(N);

      SDValue Result = DAG.getMemIntrinsicNode(

          RISCVISD::LD_RV32, DL,

          DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),

          {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());

      SDValue Lo = Result.getValue(0);

      SDValue Hi = Result.getValue(1);

      SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);

      Results.append({Pair, Result.getValue(2)});

      return;

    }


    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");


    SDLoc dl(N);

    SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),

                                 Ld->getBasePtr(), Ld->getMemoryVT(),

                                 Ld->getMemOperand());

    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));

    Results.push_back(Res.getValue(1));

    return;

  }

  case ISD::MUL: {

    unsigned Size = N->getSimpleValueType(0).getSizeInBits();

    unsigned XLen = Subtarget.getXLen();

    if (Size > XLen) {

      // This multiply needs to be expanded, try to use MULH+MUL or WMUL if

      // possible. We duplicate the default legalization to

      // MULHU/MULHS/UMUL_LOHI/SMUL_LOHI to minimize the number of calls to

      // MaskedValueIsZero and ComputeNumSignBits

      // FIXME: Should we have a target independent MULHSU/WMULSU node? Are

      // there are other targets that could use it?

      assert(Size == (XLen * 2) && "Unexpected custom legalisation");


      auto MakeMULPair = [&](SDValue L, SDValue R, unsigned HighOpc,

                             unsigned LoHiOpc) {

        MVT XLenVT = Subtarget.getXLenVT();

        L = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, L);

        R = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, R);

        SDValue Lo, Hi;

        if (Subtarget.hasStdExtP() && !Subtarget.is64Bit()) {

          SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);

          Lo = DAG.getNode(LoHiOpc, DL, VTs, L, R);

          Hi = Lo.getValue(1);

        } else {

          Lo = DAG.getNode(ISD::MUL, DL, XLenVT, L, R);

          Hi = DAG.getNode(HighOpc, DL, XLenVT, L, R);

        }

        return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);

      };


      SDValue LHS = N->getOperand(0);

      SDValue RHS = N->getOperand(1);


      APInt HighMask = APInt::getHighBitsSet(Size, XLen);

      bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);

      bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);

      if (LHSIsU && RHSIsU) {

        Results.push_back(MakeMULPair(LHS, RHS, ISD::MULHU, ISD::UMUL_LOHI));

        return;

      }


      bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;

      bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;

      if (LHSIsS && RHSIsS)

        Results.push_back(MakeMULPair(LHS, RHS, ISD::MULHS, ISD::SMUL_LOHI));

      else if (RHSIsU && LHSIsS)

        Results.push_back(

            MakeMULPair(LHS, RHS, RISCVISD::MULHSU, RISCVISD::WMULSU));

      else if (LHSIsU && RHSIsS)

        Results.push_back(

            MakeMULPair(RHS, LHS, RISCVISD::MULHSU, RISCVISD::WMULSU));


      return;

    }

    [[fallthrough]];

  }

  case ISD::ADD:

  case ISD::SUB:

    if (N->getValueType(0) == MVT::i64) {

      assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&

             "Unexpected custom legalisation");


      // Expand to ADDD/SUBD.

      auto [LHSLo, LHSHi] =

          DAG.SplitScalar(N->getOperand(0), DL, MVT::i32, MVT::i32);

      auto [RHSLo, RHSHi] =

          DAG.SplitScalar(N->getOperand(1), DL, MVT::i32, MVT::i32);

      unsigned Opc =

          N->getOpcode() == ISD::ADD ? RISCVISD::ADDD : RISCVISD::SUBD;

      SDValue Res = DAG.getNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::i32),

                                LHSLo, LHSHi, RHSLo, RHSHi);

      Res = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Res, Res.getValue(1));

      Results.push_back(Res);

      return;

    }


    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");

    Results.push_back(customLegalizeToWOpWithSExt(N, DAG));

    break;

  case ISD::SHL:

  case ISD::SRA:

  case ISD::SRL: {

    EVT VT = N->getValueType(0);

    if (VT.isFixedLengthVector() && Subtarget.hasStdExtP()) {

      assert(Subtarget.is64Bit() && (VT == MVT::v2i16 || VT == MVT::v4i8) &&

             "Unexpected vector type for P-extension shift");


      // If shift amount is a splat, don't scalarize - let normal widening

      // and SIMD patterns handle it (pslli.h, psrli.h, etc.)

      SDValue ShiftAmt = N->getOperand(1);

      if (DAG.isSplatValue(ShiftAmt, /*AllowUndefs=*/true))

        break;


      EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT);

      unsigned WidenNumElts = WidenVT.getVectorNumElements();

      // Unroll with OrigNumElts operations, padding result to WidenNumElts

      SDValue Res = DAG.UnrollVectorOp(N, WidenNumElts);

      Results.push_back(Res);

      break;

    }


    if (VT == MVT::i64) {

      assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&

             "Unexpected custom legalisation");


      SDValue LHS = N->getOperand(0);

      SDValue ShAmt = N->getOperand(1);


      unsigned WideOpc = 0;

      APInt HighMask = APInt::getHighBitsSet(64, 32);

      if (DAG.MaskedValueIsZero(LHS, HighMask))

        WideOpc = RISCVISD::WSLL;

      else if (DAG.ComputeMaxSignificantBits(LHS) <= 32)

        WideOpc = RISCVISD::WSLA;


      if (WideOpc) {

        SDValue Res =

            DAG.getNode(WideOpc, DL, DAG.getVTList(MVT::i32, MVT::i32),

                        DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LHS),

                        DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ShAmt));

        Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0),

                                      Res, Res.getValue(1)));

        return;

      }


      // Only handle constant shifts < 32. Non-constant shifts are handled by

      // lowerShiftLeftParts/lowerShiftRightParts, and shifts >= 32 use default

      // legalization.

      auto *ShAmtC = dyn_cast<ConstantSDNode>(N->getOperand(1));

      if (!ShAmtC || ShAmtC->getZExtValue() >= 32)

        break;


      auto [Lo, Hi] = DAG.SplitScalar(LHS, DL, MVT::i32, MVT::i32);


      // If the shift amount operand is coming from a vector legalization it may

      // have an illegal type.

      if (ShAmt.getValueType() != MVT::i32)

        ShAmt = DAG.getZExtOrTrunc(ShAmt, DL, MVT::i32);


      SDValue LoRes, HiRes;

      if (N->getOpcode() == ISD::SHL) {

        // Lo = slli Lo, shamt

        // Hi = nsrli {Hi, Lo}, (32 - shamt)

        uint64_t ShAmtVal = ShAmtC->getZExtValue();

        LoRes = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, ShAmt);

        HiRes = DAG.getNode(RISCVISD::NSRL, DL, MVT::i32, Lo, Hi,

                            DAG.getConstant(32 - ShAmtVal, DL, MVT::i32));

      } else {

        bool IsSRA = N->getOpcode() == ISD::SRA;

        LoRes = DAG.getNode(IsSRA ? RISCVISD::NSRA : RISCVISD::NSRL, DL,

                            MVT::i32, Lo, Hi, ShAmt);

        HiRes =

            DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32, Hi, ShAmt);

      }

      SDValue Res = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoRes, HiRes);

      Results.push_back(Res);

      return;

    }


    assert(VT == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");

    if (N->getOperand(1).getOpcode() != ISD::Constant) {

      // If we can use a BSET instruction, allow default promotion to apply.

      if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&

          isOneConstant(N->getOperand(0)))

        break;

      Results.push_back(customLegalizeToWOp(N, DAG));

      break;

    }


    // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is

    // similar to customLegalizeToWOpWithSExt, but we must zero_extend the

    // shift amount.

    if (N->getOpcode() == ISD::SHL) {

      SDLoc DL(N);

      SDValue NewOp0 =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));

      SDValue NewOp1 =

          DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));

      SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);

      SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,

                                   DAG.getValueType(MVT::i32));

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));

    }


    break;

  }

  case ISD::ROTL:

  case ISD::ROTR:

    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");

    assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||

            Subtarget.hasVendorXTHeadBb()) &&

           "Unexpected custom legalization");

    if (!isa<ConstantSDNode>(N->getOperand(1)) &&

        !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))

      return;

    Results.push_back(customLegalizeToWOp(N, DAG));

    break;

  case ISD::CTTZ:

  case ISD::CTTZ_ZERO_POISON:

  case ISD::CTLZ:

  case ISD::CTLZ_ZERO_POISON:

  case ISD::CTLS: {

    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");


    SDValue NewOp0 =

        DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));

    unsigned Opc;

    switch (N->getOpcode()) {

    default: llvm_unreachable("Unexpected opcode");

    case ISD::CTTZ:

    case ISD::CTTZ_ZERO_POISON:

      Opc = RISCVISD::CTZW;

      break;

    case ISD::CTLZ:

    case ISD::CTLZ_ZERO_POISON:

      Opc = RISCVISD::CLZW;

      break;

    case ISD::CTLS:

      Opc = RISCVISD::CLSW;

      break;

    }


    SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);

    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

    return;

  }

  case ISD::SDIV:

  case ISD::UDIV:

  case ISD::UREM: {

    MVT VT = N->getSimpleValueType(0);

    assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&

           Subtarget.is64Bit() && Subtarget.hasStdExtM() &&

           "Unexpected custom legalisation");

    // Don't promote division/remainder by constant since we should expand those

    // to multiply by magic constant.

    AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

    if (N->getOperand(1).getOpcode() == ISD::Constant &&

        !isIntDivCheap(N->getValueType(0), Attr))

      return;


    // If the input is i32, use ANY_EXTEND since the W instructions don't read

    // the upper 32 bits. For other types we need to sign or zero extend

    // based on the opcode.

    unsigned ExtOpc = ISD::ANY_EXTEND;

    if (VT != MVT::i32)

      ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND

                                           : ISD::ZERO_EXTEND;


    Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));

    break;

  }

  case ISD::SADDO:

  case ISD::SSUBO: {

    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");


    // This is similar to the default legalization, but we return the

    // sext_inreg instead of the add/sub.

    bool IsAdd = N->getOpcode() == ISD::SADDO;

    SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));

    SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));

    SDValue Op =

        DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);

    SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Op,

                              DAG.getValueType(MVT::i32));


    SDValue Overflow;


    // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise

    // use the default legalization.

    if (IsAdd && isa<ConstantSDNode>(N->getOperand(1))) {

      SDValue Zero = DAG.getConstant(0, DL, MVT::i64);


      // For an addition, the result should be less than one of the operands

      // (LHS) if and only if the other operand (RHS) is negative, otherwise

      // there will be overflow.

      EVT OType = N->getValueType(1);

      SDValue ResultLowerThanLHS =

          DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);

      SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);


      Overflow =

          DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);

    } else {

      Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, Op, ISD::SETNE);

    }


    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

    Results.push_back(Overflow);

    return;

  }

  case ISD::UADDO:

  case ISD::USUBO: {

    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");

    bool IsAdd = N->getOpcode() == ISD::UADDO;

    // Create an ADDW or SUBW.

    SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));

    SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

    SDValue Res =

        DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);

    Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,

                      DAG.getValueType(MVT::i32));


    SDValue Overflow;

    if (IsAdd && isOneConstant(RHS)) {

      // Special case uaddo X, 1 overflowed if the addition result is 0.

      // The general case (X + C) < C is not necessarily beneficial. Although we

      // reduce the live range of X, we may introduce the materialization of

      // constant C, especially when the setcc result is used by branch. We have

      // no compare with constant and branch instructions.

      Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,

                              DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);

    } else if (IsAdd && isAllOnesConstant(RHS)) {

      // Special case uaddo X, -1 overflowed if X != 0.

      Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),

                              DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);

    } else {

      // Sign extend the LHS and perform an unsigned compare with the ADDW

      // result. Since the inputs are sign extended from i32, this is equivalent

      // to comparing the lower 32 bits.

      LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));

      Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,

                              IsAdd ? ISD::SETULT : ISD::SETUGT);

    }


    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

    Results.push_back(Overflow);

    return;

  }

  case ISD::UADDSAT:

  case ISD::USUBSAT:

  case ISD::SADDSAT:

  case ISD::SSUBSAT: {

    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");


    if (Subtarget.hasStdExtP()) {

      // On RV64, map scalar i32 saturating add/sub through lane 0 of a packed

      // v2i32 operation so we can select ps*.w instructions.

      SDValue LHS = DAG.getNode(

          ISD::SCALAR_TO_VECTOR, DL, MVT::v2i32,

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)));

      SDValue RHS = DAG.getNode(

          ISD::SCALAR_TO_VECTOR, DL, MVT::v2i32,

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)));

      SDValue VecRes = DAG.getNode(N->getOpcode(), DL, MVT::v2i32, LHS, RHS);

      SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());

      Results.push_back(

          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, VecRes, Zero));

      return;

    }


    assert(!Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");

    Results.push_back(expandAddSubSat(N, DAG));

    return;

  }

  case ISD::ABS:

  case ISD::ABS_MIN_POISON: {

    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

           "Unexpected custom legalisation");


    if (Subtarget.hasStdExtP()) {

      SDValue Src =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));

      SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));

      return;

    }


    if (Subtarget.hasStdExtZbb()) {

      // Emit a special node that will be expanded to NEGW+MAX at isel.

      // This allows us to remember that the result is sign extended. Expanding

      // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.

      SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,

                                N->getOperand(0));

      SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src);

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));

      return;

    }


    // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)

    SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));


    // Freeze the source so we can increase it's use count.

    Src = DAG.getFreeze(Src);


    // Copy sign bit to all bits using the sraiw pattern.

    SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,

                                   DAG.getValueType(MVT::i32));

    SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,

                           DAG.getConstant(31, DL, MVT::i64));


    SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);

    NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);


    // NOTE: The result is only required to be anyextended, but sext is

    // consistent with type legalization of sub.

    NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,

                         DAG.getValueType(MVT::i32));

    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));

    return;

  }

  case ISD::BITCAST: {

    EVT VT = N->getValueType(0);

    assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");

    SDValue Op0 = N->getOperand(0);

    EVT Op0VT = Op0.getValueType();

    MVT XLenVT = Subtarget.getXLenVT();

    if (VT == MVT::i16 &&

        ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||

         (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {

      SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));

    } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&

               Subtarget.hasStdExtFOrZfinx()) {

      SDValue FPConv =

          DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));

    } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&

               Subtarget.hasStdExtDOrZdinx()) {

      SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,

                                   DAG.getVTList(MVT::i32, MVT::i32), Op0);

      SDValue Lo = NewReg.getValue(0);

      SDValue Hi = NewReg.getValue(1);

      // For big-endian, swap the order when building the i64 pair.

      if (!Subtarget.isLittleEndian())

        std::swap(Lo, Hi);

      SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);

      Results.push_back(RetReg);

    } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&

               isTypeLegal(Op0VT)) {

      // Custom-legalize bitcasts from fixed-length vector types to illegal

      // scalar types in order to improve codegen. Bitcast the vector to a

      // one-element vector type whose element type is the same as the result

      // type, and extract the first element.

      EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);

      if (isTypeLegal(BVT)) {

        SDValue BVec = DAG.getBitcast(BVT, Op0);

        Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));

      }

    }

    break;

  }

  case ISD::BITREVERSE: {

    assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&

           "Unexpected custom legalisation");

    MVT XLenVT = Subtarget.getXLenVT();

    SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));

    SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);

    // ReplaceNodeResults requires we maintain the same type for the return

    // value.

    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));

    break;

  }

  case RISCVISD::BREV8:

  case RISCVISD::ORC_B: {

    MVT VT = N->getSimpleValueType(0);

    MVT XLenVT = Subtarget.getXLenVT();

    assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&

           "Unexpected custom legalisation");

    assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||

            (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&

           "Unexpected extension");

    SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));

    SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);

    // ReplaceNodeResults requires we maintain the same type for the return

    // value.

    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));

    break;

  }

  case RISCVISD::ASUB:

  case RISCVISD::ASUBU:

  case RISCVISD::MULHSU:

  case RISCVISD::MULHR:

  case RISCVISD::MULHRU:

  case RISCVISD::MULHRSU: {

    MVT VT = N->getSimpleValueType(0);

    SDValue Op0 = N->getOperand(0);

    SDValue Op1 = N->getOperand(1);

    unsigned Opcode = N->getOpcode();

    // PMULH* variants don't support i8

    [[maybe_unused]] bool IsMulH =

        Opcode == RISCVISD::MULHSU || Opcode == RISCVISD::MULHR ||

        Opcode == RISCVISD::MULHRU || Opcode == RISCVISD::MULHRSU;

    assert(VT == MVT::v2i16 || (!IsMulH && VT == MVT::v4i8));

    MVT NewVT = MVT::v4i16;

    if (VT == MVT::v4i8)

      NewVT = MVT::v8i8;

    SDValue Undef = DAG.getUNDEF(VT);

    Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op0, Undef});

    Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op1, Undef});

    Results.push_back(DAG.getNode(Opcode, DL, NewVT, {Op0, Op1}));

    return;

  }

  case ISD::EXTRACT_VECTOR_ELT: {

    // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element

    // type is illegal (currently only vXi64 RV32).

    // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are

    // transferred to the destination register. We issue two of these from the

    // upper- and lower- halves of the SEW-bit vector element, slid down to the

    // first element.

    SDValue Vec = N->getOperand(0);

    SDValue Idx = N->getOperand(1);


    // The vector type hasn't been legalized yet so we can't issue target

    // specific nodes if it needs legalization.

    // FIXME: We would manually legalize if it's important.

    if (!isTypeLegal(Vec.getValueType()))

      return;


    MVT VecVT = Vec.getSimpleValueType();


    assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&

           VecVT.getVectorElementType() == MVT::i64 &&

           "Unexpected EXTRACT_VECTOR_ELT legalization");


    // If this is a fixed vector, we need to convert it to a scalable vector.

    MVT ContainerVT = VecVT;

    if (VecVT.isFixedLengthVector()) {

      ContainerVT = getContainerForFixedLengthVector(VecVT);

      Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

    }


    MVT XLenVT = Subtarget.getXLenVT();


    // Use a VL of 1 to avoid processing more elements than we need.

    auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);


    // Unless the index is known to be 0, we must slide the vector down to get

    // the desired element into index 0.

    if (!isNullConstant(Idx)) {

      Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,

                          DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);

    }


    // Extract the lower XLEN bits of the correct vector element.

    SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);


    // To extract the upper XLEN bits of the vector element, shift the first

    // element right by 32 bits and re-extract the lower XLEN bits.

    SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,

                                     DAG.getUNDEF(ContainerVT),

                                     DAG.getConstant(32, DL, XLenVT), VL);

    SDValue LShr32 =

        DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,

                    DAG.getUNDEF(ContainerVT), Mask, VL);


    SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);


    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));

    break;

  }

  case ISD::INTRINSIC_WO_CHAIN: {

    unsigned IntNo = N->getConstantOperandVal(0);

    switch (IntNo) {

    default:

      llvm_unreachable(

          "Don't know how to custom type legalize this intrinsic!");

    case Intrinsic::experimental_get_vector_length: {

      SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    case Intrinsic::riscv_orc_b:

    case Intrinsic::riscv_brev8:

    case Intrinsic::riscv_sha256sig0:

    case Intrinsic::riscv_sha256sig1:

    case Intrinsic::riscv_sha256sum0:

    case Intrinsic::riscv_sha256sum1:

    case Intrinsic::riscv_sm3p0:

    case Intrinsic::riscv_sm3p1: {

      if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)

        return;

      unsigned Opc;

      switch (IntNo) {

      case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;

      case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;

      case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;

      case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;

      case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;

      case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;

      case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;

      case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;

      }


      SDValue NewOp =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    case Intrinsic::riscv_sm4ks:

    case Intrinsic::riscv_sm4ed: {

      unsigned Opc =

          IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;

      SDValue NewOp0 =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

      SDValue NewOp1 =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));

      SDValue Res =

          DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    case Intrinsic::riscv_mopr: {

      if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)

        return;

      SDValue NewOp =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

      SDValue Res = DAG.getNode(

          RISCVISD::MOP_R, DL, MVT::i64, NewOp,

          DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    case Intrinsic::riscv_moprr: {

      if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)

        return;

      SDValue NewOp0 =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

      SDValue NewOp1 =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));

      SDValue Res = DAG.getNode(

          RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,

          DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    case Intrinsic::riscv_clmulh:

    case Intrinsic::riscv_clmulr: {

      if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)

        return;


      // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros

      // to the full 128-bit clmul result of multiplying two xlen values.

      // Perform clmulr or clmulh on the shifted values. Finally, extract the

      // upper 32 bits.

      //

      // The alternative is to mask the inputs to 32 bits and use clmul, but

      // that requires two shifts to mask each input without zext.w.

      // FIXME: If the inputs are known zero extended or could be freely

      // zero extended, the mask form would be better.

      SDValue NewOp0 =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

      SDValue NewOp1 =

          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));

      NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,

                           DAG.getConstant(32, DL, MVT::i64));

      NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,

                           DAG.getConstant(32, DL, MVT::i64));

      unsigned Opc =

          IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;

      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);

      Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,

                        DAG.getConstant(32, DL, MVT::i64));

      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));

      return;

    }

    case Intrinsic::riscv_vmv_x_s: {

      EVT VT = N->getValueType(0);

      MVT XLenVT = Subtarget.getXLenVT();

      if (VT.bitsLT(XLenVT)) {

        // Simple case just extract using vmv.x.s and truncate.

        SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,

                                      Subtarget.getXLenVT(), N->getOperand(1));

        Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));

        return;

      }


      assert(VT == MVT::i64 && !Subtarget.is64Bit() &&

             "Unexpected custom legalization");


      // We need to do the move in two steps.

      SDValue Vec = N->getOperand(1);

      MVT VecVT = Vec.getSimpleValueType();


      // First extract the lower XLEN bits of the element.

      SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);


      // To extract the upper XLEN bits of the vector element, shift the first

      // element right by 32 bits and re-extract the lower XLEN bits.

      auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);


      SDValue ThirtyTwoV =

          DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),

                      DAG.getConstant(32, DL, XLenVT), VL);

      SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,

                                   DAG.getUNDEF(VecVT), Mask, VL);

      SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);


      Results.push_back(

          DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));

      break;

    }

    }

    break;

  }

  case ISD::VECREDUCE_ADD:

  case ISD::VECREDUCE_AND:

  case ISD::VECREDUCE_OR:

  case ISD::VECREDUCE_XOR:

  case ISD::VECREDUCE_SMAX:

  case ISD::VECREDUCE_UMAX:

  case ISD::VECREDUCE_SMIN:

  case ISD::VECREDUCE_UMIN:

    if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))

      Results.push_back(V);

    break;

  case ISD::VP_REDUCE_ADD:

  case ISD::VP_REDUCE_AND:

  case ISD::VP_REDUCE_OR:

  case ISD::VP_REDUCE_XOR:

  case ISD::VP_REDUCE_SMAX:

  case ISD::VP_REDUCE_UMAX:

  case ISD::VP_REDUCE_SMIN:

  case ISD::VP_REDUCE_UMIN:

    if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))

      Results.push_back(V);

    break;

  case ISD::GET_ROUNDING: {

    SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);

    SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));

    Results.push_back(Res.getValue(0));

    Results.push_back(Res.getValue(1));

    break;

  }

  }

}


/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP

/// which corresponds to it.


static unsigned getVecReduceOpcode(unsigned Opc) {

  switch (Opc) {

  default:

    llvm_unreachable("Unhandled binary to transform reduction");

  case ISD::ADD:

    return ISD::VECREDUCE_ADD;

  case ISD::UMAX:

    return ISD::VECREDUCE_UMAX;

  case ISD::SMAX:

    return ISD::VECREDUCE_SMAX;

  case ISD::UMIN:

    return ISD::VECREDUCE_UMIN;

  case ISD::SMIN:

    return ISD::VECREDUCE_SMIN;

  case ISD::AND:

    return ISD::VECREDUCE_AND;

  case ISD::OR:

    return ISD::VECREDUCE_OR;

  case ISD::XOR:

    return ISD::VECREDUCE_XOR;

  case ISD::FADD:

    // Note: This is the associative form of the generic reduction opcode.

    return ISD::VECREDUCE_FADD;

  case ISD::FMAXNUM:

    return ISD::VECREDUCE_FMAX;

  case ISD::FMINNUM:

    return ISD::VECREDUCE_FMIN;

  }

}


/// Perform two related transforms whose purpose is to incrementally recognize

/// an explode_vector followed by scalar reduction as a vector reduction node.

/// This exists to recover from a deficiency in SLP which can't handle

/// forests with multiple roots sharing common nodes.  In some cases, one

/// of the trees will be vectorized, and the other will remain (unprofitably)

/// scalarized.

static SDValue


combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,

                                  const RISCVSubtarget &Subtarget) {


  // This transforms need to run before all integer types have been legalized

  // to i64 (so that the vector element type matches the add type), and while

  // it's safe to introduce odd sized vector types.

  if (DAG.NewNodesMustHaveLegalTypes)

    return SDValue();


  // Without V, this transform isn't useful.  We could form the (illegal)

  // operations and let them be scalarized again, but there's really no point.

  if (!Subtarget.hasVInstructions())

    return SDValue();


  const SDLoc DL(N);

  const EVT VT = N->getValueType(0);

  const unsigned Opc = N->getOpcode();


  if (!VT.isInteger()) {

    switch (Opc) {

    default:

      return SDValue();

    case ISD::FADD:

      // For FADD, we only handle the case with reassociation allowed.  We

      // could handle strict reduction order, but at the moment, there's no

      // known reason to, and the complexity isn't worth it.

      if (!N->getFlags().hasAllowReassociation())

        return SDValue();

      break;

    case ISD::FMAXNUM:

    case ISD::FMINNUM:

      break;

    }

  }


  const unsigned ReduceOpc = getVecReduceOpcode(Opc);

  assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&

         "Inconsistent mappings");

  SDValue LHS = N->getOperand(0);

  SDValue RHS = N->getOperand(1);


  if (!LHS.hasOneUse() || !RHS.hasOneUse())

    return SDValue();


  if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)

    std::swap(LHS, RHS);


  if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

      !isa<ConstantSDNode>(RHS.getOperand(1)))

    return SDValue();


  uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();

  SDValue SrcVec = RHS.getOperand(0);

  EVT SrcVecVT = SrcVec.getValueType();

  assert(SrcVecVT.getVectorElementType() == VT);

  if (SrcVecVT.isScalableVector())

    return SDValue();


  if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())

    return SDValue();


  // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to

  // reduce_op (extract_subvector [2 x VT] from V).  This will form the

  // root of our reduction tree. TODO: We could extend this to any two

  // adjacent aligned constant indices if desired.

  if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

      LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {

    uint64_t LHSIdx =

      cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();

    if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {

      EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);

      SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);

      return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());

    }

  }


  // Match (binop (reduce (extract_subvector V, 0),

  //                      (extract_vector_elt V, sizeof(SubVec))))

  // into a reduction of one more element from the original vector V.

  if (LHS.getOpcode() != ReduceOpc)

    return SDValue();


  SDValue ReduceVec = LHS.getOperand(0);

  if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&

      ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&

      isNullConstant(ReduceVec.getOperand(1)) &&

      ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {

    // For illegal types (e.g. 3xi32), most will be combined again into a

    // wider (hopefully legal) type.  If this is a terminal state, we are

    // relying on type legalization here to produce something reasonable

    // and this lowering quality could probably be improved. (TODO)

    EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);

    SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);

    return DAG.getNode(ReduceOpc, DL, VT, Vec,

                       ReduceVec->getFlags() & N->getFlags());

  }


  return SDValue();

}


// Try to fold (<bop> x, (reduction.<bop> vec, start))


static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,

                                    const RISCVSubtarget &Subtarget) {

  auto BinOpToRVVReduce = [](unsigned Opc) {

    switch (Opc) {

    default:

      llvm_unreachable("Unhandled binary to transform reduction");

    case ISD::ADD:

      return RISCVISD::VECREDUCE_ADD_VL;

    case ISD::UMAX:

      return RISCVISD::VECREDUCE_UMAX_VL;

    case ISD::SMAX:

      return RISCVISD::VECREDUCE_SMAX_VL;

    case ISD::UMIN:

      return RISCVISD::VECREDUCE_UMIN_VL;

    case ISD::SMIN:

      return RISCVISD::VECREDUCE_SMIN_VL;

    case ISD::AND:

      return RISCVISD::VECREDUCE_AND_VL;

    case ISD::OR:

      return RISCVISD::VECREDUCE_OR_VL;

    case ISD::XOR:

      return RISCVISD::VECREDUCE_XOR_VL;

    case ISD::FADD:

      return RISCVISD::VECREDUCE_FADD_VL;

    case ISD::FMAXNUM:

      return RISCVISD::VECREDUCE_FMAX_VL;

    case ISD::FMINNUM:

      return RISCVISD::VECREDUCE_FMIN_VL;

    }

  };


  auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {

    return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

           isNullConstant(V.getOperand(1)) &&

           V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);

  };


  unsigned Opc = N->getOpcode();

  unsigned ReduceIdx;

  if (IsReduction(N->getOperand(0), Opc))

    ReduceIdx = 0;

  else if (IsReduction(N->getOperand(1), Opc))

    ReduceIdx = 1;

  else

    return SDValue();


  // Skip if FADD disallows reassociation but the combiner needs.

  if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())

    return SDValue();


  SDValue Extract = N->getOperand(ReduceIdx);

  SDValue Reduce = Extract.getOperand(0);

  if (!Extract.hasOneUse() || !Reduce.hasOneUse())

    return SDValue();


  SDValue ScalarV = Reduce.getOperand(2);

  EVT ScalarVT = ScalarV.getValueType();

  if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&

      ScalarV.getOperand(0)->isUndef() &&

      isNullConstant(ScalarV.getOperand(2)))

    ScalarV = ScalarV.getOperand(1);


  // Make sure that ScalarV is a splat with VL=1.

  if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&

      ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&

      ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)

    return SDValue();


  if (!isNonZeroAVL(ScalarV.getOperand(2)))

    return SDValue();


  // Check the scalar of ScalarV is neutral element

  // TODO: Deal with value other than neutral element.

  if (!DAG.isIdentityElement(N->getOpcode(), N->getFlags(),

                             ScalarV.getOperand(1), 0))

    return SDValue();


  // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.

  // FIXME: We might be able to improve this if operand 0 is undef.

  if (!isNonZeroAVL(Reduce.getOperand(5)))

    return SDValue();


  SDValue NewStart = N->getOperand(1 - ReduceIdx);


  SDLoc DL(N);

  SDValue NewScalarV =

      lowerScalarInsert(NewStart, ScalarV.getOperand(2),

                        ScalarV.getSimpleValueType(), DL, DAG, Subtarget);


  // If we looked through an INSERT_SUBVECTOR we need to restore it.

  if (ScalarVT != ScalarV.getValueType())

    NewScalarV =

        DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);


  SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),

                   NewScalarV,           Reduce.getOperand(3),

                   Reduce.getOperand(4), Reduce.getOperand(5)};

  SDValue NewReduce =

      DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);

  return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,

                     Extract.getOperand(1));

}


// Optimize (add (shl x, c0), (shl y, c1)) ->

//          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].

// or

//          (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.


static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,

                                  const RISCVSubtarget &Subtarget) {

  // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba

  // extension.

  if (!Subtarget.hasShlAdd(3))

    return SDValue();


  // Skip for vector types and larger types.

  EVT VT = N->getValueType(0);

  if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())

    return SDValue();


  // The two operand nodes must be SHL and have no other use.

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||

      !N0->hasOneUse() || !N1->hasOneUse())

    return SDValue();


  // Check c0 and c1.

  auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));

  auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));

  if (!N0C || !N1C)

    return SDValue();

  int64_t C0 = N0C->getSExtValue();

  int64_t C1 = N1C->getSExtValue();

  if (C0 <= 0 || C1 <= 0)

    return SDValue();


  int64_t Diff = std::abs(C0 - C1);

  if (!Subtarget.hasShlAdd(Diff))

    return SDValue();


  // Build nodes.

  SDLoc DL(N);

  int64_t Bits = std::min(C0, C1);

  SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);

  SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);

  SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,

                              DAG.getTargetConstant(Diff, DL, VT), NS);

  return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));

}


// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,

// or 3.


static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other,

                                     SelectionDAG &DAG) {

  using namespace llvm::SDPatternMatch;


  // Looking for a reg-reg add and not an addi.

  if (isa<ConstantSDNode>(N->getOperand(1)))

    return SDValue();


  // Based on testing it seems that performance degrades if the ADDI has

  // more than 2 uses.

  if (AddI->use_size() > 2)

    return SDValue();


  APInt AddVal;

  SDValue SHLVal;

  if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))

    return SDValue();


  APInt VShift;

  if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))

    return SDValue();


  if (VShift.slt(1) || VShift.sgt(3))

    return SDValue();


  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  // The shift must be positive but the add can be signed.

  uint64_t ShlConst = VShift.getZExtValue();

  int64_t AddConst = AddVal.getSExtValue();


  SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),

                              DAG.getTargetConstant(ShlConst, DL, VT), Other);

  return DAG.getNode(ISD::ADD, DL, VT, SHADD,

                     DAG.getSignedConstant(AddConst, DL, VT));

}


// Optimize (add (add (shl x, c0),  c1), y) ->

//          (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].


static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  // Perform this optimization only in the zba extension.

  if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))

    return SDValue();


  // Skip for vector types and larger types.

  EVT VT = N->getValueType(0);

  if (VT != Subtarget.getXLenVT())

    return SDValue();


  SDValue AddI = N->getOperand(0);

  SDValue Other = N->getOperand(1);

  if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))

    return V;

  if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))

    return V;

  return SDValue();

}


// Combine a constant select operand into its use:

//

// (and (select cond, -1, c), x)

//   -> (select cond, x, (and x, c))  [AllOnes=1]

// (or  (select cond, 0, c), x)

//   -> (select cond, x, (or x, c))  [AllOnes=0]

// (xor (select cond, 0, c), x)

//   -> (select cond, x, (xor x, c))  [AllOnes=0]

// (add (select cond, 0, c), x)

//   -> (select cond, x, (add x, c))  [AllOnes=0]

// (sub x, (select cond, 0, c))

//   -> (select cond, x, (sub x, c))  [AllOnes=0]


static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,

                                   SelectionDAG &DAG, bool AllOnes,

                                   const RISCVSubtarget &Subtarget) {

  EVT VT = N->getValueType(0);


  // Skip vectors.

  if (VT.isVector())

    return SDValue();


  if (!Subtarget.hasConditionalMoveFusion()) {

    // (select cond, x, (and x, c)) has custom lowering with Zicond.

    if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)

      return SDValue();


    // Maybe harmful when condition code has multiple use.

    if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())

      return SDValue();


    // Maybe harmful when VT is wider than XLen.

    if (VT.getSizeInBits() > Subtarget.getXLen())

      return SDValue();

  }


  if ((Slct.getOpcode() != ISD::SELECT &&

       Slct.getOpcode() != RISCVISD::SELECT_CC) ||

      !Slct.hasOneUse())

    return SDValue();


  auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {

    return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);

  };


  bool SwapSelectOps;

  unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;

  SDValue TrueVal = Slct.getOperand(1 + OpOffset);

  SDValue FalseVal = Slct.getOperand(2 + OpOffset);

  SDValue NonConstantVal;

  if (isZeroOrAllOnes(TrueVal, AllOnes)) {

    SwapSelectOps = false;

    NonConstantVal = FalseVal;

  } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {

    SwapSelectOps = true;

    NonConstantVal = TrueVal;

  } else

    return SDValue();


  // Slct is now know to be the desired identity constant when CC is true.

  TrueVal = OtherOp;

  FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);

  // Unless SwapSelectOps says the condition should be false.

  if (SwapSelectOps)

    std::swap(TrueVal, FalseVal);


  if (Slct.getOpcode() == RISCVISD::SELECT_CC)

    return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,

                       {Slct.getOperand(0), Slct.getOperand(1),

                        Slct.getOperand(2), TrueVal, FalseVal});


  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,

                     {Slct.getOperand(0), TrueVal, FalseVal});

}


// Attempt combineSelectAndUse on each operand of a commutative operator N.


static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,

                                              bool AllOnes,

                                              const RISCVSubtarget &Subtarget) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))

    return Result;

  if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))

    return Result;

  return SDValue();

}


// Transform (add (mul x, c0), c1) ->

//           (add (mul (add x, c1/c0), c0), c1%c0).

// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case

// that should be excluded is when c0*(c1/c0) is simm12, which will lead

// to an infinite loop in DAGCombine if transformed.

// Or transform (add (mul x, c0), c1) ->

//              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),

// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner

// case that should be excluded is when c0*(c1/c0+1) is simm12, which will

// lead to an infinite loop in DAGCombine if transformed.

// Or transform (add (mul x, c0), c1) ->

//              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),

// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner

// case that should be excluded is when c0*(c1/c0-1) is simm12, which will

// lead to an infinite loop in DAGCombine if transformed.

// Or transform (add (mul x, c0), c1) ->

//              (mul (add x, c1/c0), c0).

// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.


static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,

                                     const RISCVSubtarget &Subtarget) {

  // Skip for vector types and larger types.

  EVT VT = N->getValueType(0);

  if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())

    return SDValue();

  // The first operand node must be a MUL and has no other use.

  SDValue N0 = N->getOperand(0);

  if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)

    return SDValue();

  // Check if c0 and c1 match above conditions.

  auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));

  auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));

  if (!N0C || !N1C)

    return SDValue();

  // If N0C has multiple uses it's possible one of the cases in

  // DAGCombiner::isMulAddWithConstProfitable will be true, which would result

  // in an infinite loop.

  if (!N0C->hasOneUse())

    return SDValue();

  int64_t C0 = N0C->getSExtValue();

  int64_t C1 = N1C->getSExtValue();

  int64_t CA, CB;

  if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))

    return SDValue();

  // Search for proper CA (non-zero) and CB that both are simm12.

  if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&

      !isInt<12>(C0 * (C1 / C0))) {

    CA = C1 / C0;

    CB = C1 % C0;

  } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&

             isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {

    CA = C1 / C0 + 1;

    CB = C1 % C0 - C0;

  } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&

             isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {

    CA = C1 / C0 - 1;

    CB = C1 % C0 + C0;

  } else

    return SDValue();

  // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).

  SDLoc DL(N);

  SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),

                             DAG.getSignedConstant(CA, DL, VT));

  SDValue New1 =

      DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));

  return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));

}


// add (zext, zext) -> zext (add (zext, zext))

// sub (zext, zext) -> sext (sub (zext, zext))

// mul (zext, zext) -> zext (mul (zext, zext))

// sdiv (zext, zext) -> zext (sdiv (zext, zext))

// udiv (zext, zext) -> zext (udiv (zext, zext))

// srem (zext, zext) -> zext (srem (zext, zext))

// urem (zext, zext) -> zext (urem (zext, zext))

//

// where the sum of the extend widths match, and the the range of the bin op

// fits inside the width of the narrower bin op. (For profitability on rvv, we

// use a power of two for both inner and outer extend.)


static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) {


  EVT VT = N->getValueType(0);

  if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))

    return SDValue();


  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  if (N0.getOpcode() != ISD::ZERO_EXTEND || N1.getOpcode() != ISD::ZERO_EXTEND)

    return SDValue();

  if (!N0.hasOneUse() || !N1.hasOneUse())

    return SDValue();


  SDValue Src0 = N0.getOperand(0);

  SDValue Src1 = N1.getOperand(0);

  EVT SrcVT = Src0.getValueType();

  if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||

      SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||

      SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)

    return SDValue();


  LLVMContext &C = *DAG.getContext();

  EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(C);

  EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());


  Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);

  Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);


  // Src0 and Src1 are zero extended, so they're always positive if signed.

  //

  // sub can produce a negative from two positive operands, so it needs sign

  // extended. Other nodes produce a positive from two positive operands, so

  // zero extend instead.

  unsigned OuterExtend =

      N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;


  return DAG.getNode(

      OuterExtend, SDLoc(N), VT,

      DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));

}


// Try to turn (add (xor bool, 1) -1) into (neg bool).


static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  EVT VT = N->getValueType(0);

  SDLoc DL(N);


  // RHS should be -1.

  if (!isAllOnesConstant(N1))

    return SDValue();


  // Look for (xor X, 1).

  if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))

    return SDValue();


  // First xor input should be 0 or 1.

  APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);

  if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))

    return SDValue();


  // Emit a negate of the setcc.

  return DAG.getNegative(N0.getOperand(0), DL, VT);

}


// Fold (add X, (mulhs X, C)) -> (mulhsu X, C) if C is negative. This occurs

// in the expansion of sdiv i32 X, 7 using magic multiply.

//

// mulhs returns the hi from X * C = hi * 2^32 + lo.

//

// Since C<0, u(C) as an unsigned constant is 2^32 + C = u(C).

// mulhsu computes

//  X * u(C0) = X * (C + 2^32)

//            = X * 2^32 + C * X // C * X is the same as mulhs

//            = X * 2^32 + hi * 2^32 + lo

//            = (X + hi) * 2^32 + lo

// So mulhsu computes (X + hi).


static SDValue combineAddMulh(SDNode *N, SelectionDAG &DAG,

                              const RISCVSubtarget &Subtarget) {

  EVT VT = N->getValueType(0);

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  bool IsPExtPackedDoubleType =

      VT.isSimple() && Subtarget.isPExtPackedDoubleType(VT.getSimpleVT());

  if (!TLI.isOperationLegal(ISD::MULHS, VT) && !IsPExtPackedDoubleType)

    return SDValue();


  using namespace SDPatternMatch;

  SDValue X, Mulh;

  APInt C;

  if (!sd_match(N,

                m_Add(m_Value(X),

                      m_OneUse(m_Value(Mulh, m_BinOp(ISD::MULHS, m_Deferred(X),

                                                     m_ConstInt(C)))))) ||

      !C.isNegative())

    return SDValue();


  SDLoc DL(N);


  // We need to split double wide vectors ourselves, op legalization won't

  // run for custom nodes.

  if (IsPExtPackedDoubleType) {

    MVT HalfVT = VT.getSimpleVT().getHalfNumVectorElementsVT();

    auto [XLo, XHi] = DAG.SplitVector(X, DL, HalfVT, HalfVT);

    auto [CLo, CHi] = DAG.SplitVector(Mulh.getOperand(1), DL, HalfVT, HalfVT);

    SDValue ResLo = DAG.getNode(RISCVISD::MULHSU, DL, HalfVT, XLo, CLo);

    SDValue ResHi = DAG.getNode(RISCVISD::MULHSU, DL, HalfVT, XHi, CHi);

    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);

  }


  return DAG.getNode(RISCVISD::MULHSU, DL, VT, X, Mulh.getOperand(1));

}


static SDValue performADDCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;

  if (SDValue V = combineAddOfBooleanXor(N, DAG))

    return V;

  if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))

    return V;

  if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {

    if (SDValue V = transformAddShlImm(N, DAG, Subtarget))

      return V;

    if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))

      return V;

  }

  if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))

    return V;

  if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

    return V;

  if (SDValue V = combineBinOpOfZExt(N, DAG))

    return V;

  if (SDValue V = combineAddMulh(N, DAG, Subtarget))

    return V;


  // fold (add (select lhs, rhs, cc, 0, y), x) ->

  //      (select lhs, rhs, cc, x, (add x, y))

  return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);

}


// Try to turn a sub boolean RHS and constant LHS into an addi.


static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  EVT VT = N->getValueType(0);

  SDLoc DL(N);


  // Require a constant LHS.

  auto *N0C = dyn_cast<ConstantSDNode>(N0);

  if (!N0C)

    return SDValue();


  // All our optimizations involve subtracting 1 from the immediate and forming

  // an ADDI. Make sure the new immediate is valid for an ADDI.

  APInt ImmValMinus1 = N0C->getAPIntValue() - 1;

  if (!ImmValMinus1.isSignedIntN(12))

    return SDValue();


  SDValue NewLHS;

  if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {

    // (sub constant, (setcc x, y, eq/neq)) ->

    // (add (setcc x, y, neq/eq), constant - 1)

    ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();

    EVT SetCCOpVT = N1.getOperand(0).getValueType();

    if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())

      return SDValue();

    CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);

    NewLHS =

        DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);

  } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&

             N1.getOperand(0).getOpcode() == ISD::SETCC) {

    // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).

    // Since setcc returns a bool the xor is equivalent to 1-setcc.

    NewLHS = N1.getOperand(0);

  } else

    return SDValue();


  SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);

  return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);

}


// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is

// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)

// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is

// valid with Y=3, while 0b0000_1000_0000_0100 is not.


static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,

                                     const RISCVSubtarget &Subtarget) {

  if (!Subtarget.hasStdExtZbb())

    return SDValue();


  EVT VT = N->getValueType(0);


  if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)

    return SDValue();


  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  if (N0->getOpcode() != ISD::SHL)

    return SDValue();


  auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));

  if (!ShAmtCLeft)

    return SDValue();

  unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();


  if (ShiftedAmount >= 8)

    return SDValue();


  SDValue LeftShiftOperand = N0->getOperand(0);

  SDValue RightShiftOperand = N1;


  if (ShiftedAmount != 0) { // Right operand must be a right shift.

    if (N1->getOpcode() != ISD::SRL)

      return SDValue();

    auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));

    if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)

      return SDValue();

    RightShiftOperand = N1.getOperand(0);

  }


  // At least one shift should have a single use.

  if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))

    return SDValue();


  if (LeftShiftOperand != RightShiftOperand)

    return SDValue();


  APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));

  Mask <<= ShiftedAmount;

  // Check that X has indeed the right shape (only the Y-th bit can be set in

  // every byte).

  if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))

    return SDValue();


  return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);

}


static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  if (SDValue V = combineSubOfBoolean(N, DAG))

    return V;


  EVT VT = N->getValueType(0);

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)

  if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&

      isNullConstant(N1.getOperand(1)) &&

      N1.getValueType() == N1.getOperand(0).getValueType()) {

    ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();

    if (CCVal == ISD::SETLT) {

      SDLoc DL(N);

      unsigned ShAmt = N0.getValueSizeInBits() - 1;

      return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),

                         DAG.getConstant(ShAmt, DL, VT));

    }

  }


  if (SDValue V = combineBinOpOfZExt(N, DAG))

    return V;

  if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))

    return V;


  // fold (sub x, (select lhs, rhs, cc, 0, y)) ->

  //      (select lhs, rhs, cc, x, (sub x, y))

  return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);

}


// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.

// Legalizing setcc can introduce xors like this. Doing this transform reduces

// the number of xors and may allow the xor to fold into a branch condition.


static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  bool IsAnd = N->getOpcode() == ISD::AND;


  if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)

    return SDValue();


  if (!N0.hasOneUse() || !N1.hasOneUse())

    return SDValue();


  SDValue N01 = N0.getOperand(1);

  SDValue N11 = N1.getOperand(1);


  // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into

  // (xor X, -1) based on the upper bits of the other operand being 0. If the

  // operation is And, allow one of the Xors to use -1.

  if (isOneConstant(N01)) {

    if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))

      return SDValue();

  } else if (isOneConstant(N11)) {

    // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.

    if (!(IsAnd && isAllOnesConstant(N01)))

      return SDValue();

  } else

    return SDValue();


  EVT VT = N->getValueType(0);


  SDValue N00 = N0.getOperand(0);

  SDValue N10 = N1.getOperand(0);


  // The LHS of the xors needs to be 0/1.

  APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);

  if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))

    return SDValue();


  // Invert the opcode and insert a new xor.

  SDLoc DL(N);

  unsigned Opc = IsAnd ? ISD::OR : ISD::AND;

  SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);

  return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));

}


// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to

// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed

// value to an unsigned value. This will be lowered to vmax and series of

// vnclipu instructions later. This can be extended to other truncated types

// other than i8 by replacing 256 and 255 with the equivalent constants for the

// type.


static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) {

  EVT VT = N->getValueType(0);

  SDValue N0 = N->getOperand(0);

  EVT SrcVT = N0.getValueType();


  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))

    return SDValue();


  if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())

    return SDValue();


  SDValue Cond = N0.getOperand(0);

  SDValue True = N0.getOperand(1);

  SDValue False = N0.getOperand(2);


  if (Cond.getOpcode() != ISD::SETCC)

    return SDValue();


  // FIXME: Support the version of this pattern with the select operands

  // swapped.

  ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

  if (CCVal != ISD::SETULT)

    return SDValue();


  SDValue CondLHS = Cond.getOperand(0);

  SDValue CondRHS = Cond.getOperand(1);


  if (CondLHS != True)

    return SDValue();


  unsigned ScalarBits = VT.getScalarSizeInBits();


  // FIXME: Support other constants.

  ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);

  if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))

    return SDValue();


  if (False.getOpcode() != ISD::SIGN_EXTEND)

    return SDValue();


  False = False.getOperand(0);


  if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)

    return SDValue();


  ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));

  if (!FalseRHSC || !FalseRHSC->isZero())

    return SDValue();


  ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();

  if (CCVal2 != ISD::SETGT)

    return SDValue();


  // Emit the signed to unsigned saturation pattern.

  SDLoc DL(N);

  SDValue Max =

      DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));

  SDValue Min =

      DAG.getNode(ISD::SMIN, DL, SrcVT, Max,

                  DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));

  return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);

}


// Handle P extension truncate patterns, both on packed vectors and on scalar

// i32 (the RV32-only asub/asubu and mulhr* instructions):

// ASUB/ASUBU: (trunc (srl (sub ([s|z]ext a), ([s|z]ext b)), 1))

// MULHSU: (trunc (srl (mul (sext a), (zext b)), EltBits))

// MULHR*: (trunc (srl (add (mul (sext a), (zext b)), round_const), EltBits))


static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG,

                                   const RISCVSubtarget &Subtarget) {

  SDValue N0 = N->getOperand(0);

  EVT VT = N->getValueType(0);

  if (N0.getOpcode() != ISD::SRL)

    return SDValue();


  if (VT != MVT::v4i16 && VT != MVT::v2i16 && VT != MVT::v8i8 &&

      VT != MVT::v4i8 && VT != MVT::v2i32 && VT != MVT::i32)

    return SDValue();


  ConstantSDNode *C = isConstOrConstSplat(N0.getOperand(1));

  if (!C)

    return SDValue();


  SDValue Op = N0.getOperand(0);

  unsigned ShAmtVal = C->getZExtValue();

  unsigned EltBits = VT.getScalarSizeInBits();


  // Check for rounding pattern: (add (mul ...), round_const)

  bool IsRounding = false;

  if (Op.getOpcode() == ISD::ADD && (EltBits == 16 || EltBits == 32)) {

    ConstantSDNode *RndC = isConstOrConstSplat(Op.getOperand(1));

    if (RndC && RndC->getZExtValue() == (1ULL << (EltBits - 1)) &&

        Op.getOperand(0).getOpcode() == ISD::MUL) {

      Op = Op.getOperand(0);

      IsRounding = true;

    }

  }


  // Ensure Op is a binary operation before accessing its operands.

  if (Op.getNumOperands() != 2)

    return SDValue();


  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);


  bool LHSIsSExt = LHS.getOpcode() == ISD::SIGN_EXTEND;

  bool LHSIsZExt = LHS.getOpcode() == ISD::ZERO_EXTEND;

  bool RHSIsSExt = RHS.getOpcode() == ISD::SIGN_EXTEND;

  bool RHSIsZExt = RHS.getOpcode() == ISD::ZERO_EXTEND;


  if (!(LHSIsSExt || LHSIsZExt) || !(RHSIsSExt || RHSIsZExt))

    return SDValue();


  SDValue A = LHS.getOperand(0);

  SDValue B = RHS.getOperand(0);


  if (A.getValueType() != VT || B.getValueType() != VT)

    return SDValue();


  unsigned Opc;

  switch (Op.getOpcode()) {

  default:

    return SDValue();

  case ISD::SUB:

    // PASUB/PASUBU: shift amount must be 1

    if (ShAmtVal != 1)

      return SDValue();

    if (LHSIsSExt && RHSIsSExt)

      Opc = RISCVISD::ASUB;

    else if (LHSIsZExt && RHSIsZExt)

      Opc = RISCVISD::ASUBU;

    else

      return SDValue();

    break;

  case ISD::MUL:

    // MULH*/MULHR*: shift amount must be element size, only for i16/i32

    if (ShAmtVal != EltBits || (EltBits != 16 && EltBits != 32))

      return SDValue();

    if (!Subtarget.is64Bit() && (VT == MVT::v2i32 || VT == MVT::v4i16))

      return SDValue();

    if (IsRounding) {

      if (LHSIsSExt && RHSIsSExt) {

        Opc = RISCVISD::MULHR;

      } else if (LHSIsZExt && RHSIsZExt) {

        Opc = RISCVISD::MULHRU;

      } else if ((LHSIsSExt && RHSIsZExt) || (LHSIsZExt && RHSIsSExt)) {

        Opc = RISCVISD::MULHRSU;

        // commuted case

        if (LHSIsZExt && RHSIsSExt)

          std::swap(A, B);

      } else {

        return SDValue();

      }

    } else {

      // Scalar mulhsu is handled elsewhere, only match the packed MULHSU here.

      if (!VT.isVector())

        return SDValue();

      if ((LHSIsSExt && RHSIsZExt) || (LHSIsZExt && RHSIsSExt)) {

        Opc = RISCVISD::MULHSU;

        // commuted case

        if (LHSIsZExt && RHSIsSExt)

          std::swap(A, B);

      } else

        return SDValue();

    }

    break;

  }


  return DAG.getNode(Opc, SDLoc(N), VT, {A, B});

}


static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  SDValue N0 = N->getOperand(0);

  EVT VT = N->getValueType(0);


  // P truncate patterns: packed vectors, plus RV32-only scalar i32.

  if (Subtarget.hasStdExtP() &&

      (VT.isFixedLengthVector() || (VT == MVT::i32 && !Subtarget.is64Bit())))

    return combinePExtTruncate(N, DAG, Subtarget);


  // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero

  // extending X. This is safe since we only need the LSB after the shift and

  // shift amounts larger than 31 would produce poison. If we wait until

  // type legalization, we'll create RISCVISD::SRLW and we can't recover it

  // to use a BEXT instruction.

  if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&

      N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&

      !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {

    SDLoc DL(N0);

    SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));

    SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));

    SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);

    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);

  }


  return combineTruncSelectToSMaxUSat(N, DAG);

}


// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a

// truncation. But RVV doesn't have truncation instructions for more than twice

// the bitwidth.

//

// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:

//

//     vsetvli a0, zero, e32, m2, ta, ma

//     vnsrl.wi v12, v8, 0

//     vsetvli zero, zero, e16, m1, ta, ma

//     vnsrl.wi v8, v12, 0

//     vsetvli zero, zero, e8, mf2, ta, ma

//     vnsrl.wi v8, v8, 0

//

// So reverse the combine so we generate an vmseq/vmsne again:

//

// and (lshr (trunc X), ShAmt), 1

// -->

// zext (icmp ne (and X, (1 << ShAmt)), 0)

//

// and (lshr (not (trunc X)), ShAmt), 1

// -->

// zext (icmp eq (and X, (1 << ShAmt)), 0)


static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  using namespace SDPatternMatch;

  SDLoc DL(N);


  if (!Subtarget.hasVInstructions())

    return SDValue();


  EVT VT = N->getValueType(0);

  if (!VT.isVector())

    return SDValue();


  APInt ShAmt;

  SDValue Inner;

  if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),

                         m_One())))

    return SDValue();


  SDValue X;

  bool IsNot;

  if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))

    IsNot = true;

  else if (sd_match(Inner, m_Trunc(m_Value(X))))

    IsNot = false;

  else

    return SDValue();


  EVT WideVT = X.getValueType();

  if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)

    return SDValue();


  SDValue Res =

      DAG.getNode(ISD::AND, DL, WideVT, X,

                  DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));

  Res = DAG.getSetCC(DL,

                     EVT::getVectorVT(*DAG.getContext(), MVT::i1,

                                      WideVT.getVectorElementCount()),

                     Res, DAG.getConstant(0, DL, WideVT),

                     IsNot ? ISD::SETEQ : ISD::SETNE);

  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);

}


// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)

// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)

// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)

// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)


static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG,

                                        const RISCVSubtarget &Subtarget) {

  if (!Subtarget.hasCZEROLike())

    return SDValue();


  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  auto IsEqualCompZero = [](SDValue &V) -> bool {

    if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {

      ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();

      if (ISD::isIntEqualitySetCC(CC))

        return true;

    }

    return false;

  };


  if (!IsEqualCompZero(N0) || !N0.hasOneUse())

    std::swap(N0, N1);

  if (!IsEqualCompZero(N0) || !N0.hasOneUse())

    return SDValue();


  KnownBits Known = DAG.computeKnownBits(N1);

  if (Known.getMaxValue().ugt(1))

    return SDValue();


  unsigned CzeroOpcode =

      (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)

          ? RISCVISD::CZERO_EQZ

          : RISCVISD::CZERO_NEZ;


  EVT VT = N->getValueType(0);

  SDLoc DL(N);

  return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));

}


static SDValue reduceANDOfAtomicLoad(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI) {

  SelectionDAG &DAG = DCI.DAG;

  if (N->getOpcode() != ISD::AND)

    return SDValue();


  SDValue N0 = N->getOperand(0);

  if (N0.getOpcode() != ISD::ATOMIC_LOAD)

    return SDValue();

  if (!N0.hasOneUse())

    return SDValue();


  AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());

  if (isStrongerThanMonotonic(ALoad->getSuccessOrdering()))

    return SDValue();


  EVT LoadedVT = ALoad->getMemoryVT();

  ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));

  if (!MaskConst)

    return SDValue();

  uint64_t Mask = MaskConst->getZExtValue();

  uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());

  if (Mask != ExpectedMask)

    return SDValue();


  SDValue ZextLoad = DAG.getAtomicLoad(

      ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),

      ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());

  DCI.CombineTo(N, ZextLoad);

  DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));

  DCI.recursivelyDeleteUnusedNodes(N0.getNode());

  return SDValue(N, 0);

}


// Sometimes a mask is applied after a shift. If that shift was fed by a

// load, there is sometimes the opportunity to narrow the load, which is

// hidden by the intermediate shift. Detect that case and commute the

// shift/and in order to enable load narrowing.


static SDValue combineNarrowableShiftedLoad(SDNode *N, SelectionDAG &DAG) {

  EVT VT = N->getValueType(0);

  if (!VT.isScalarInteger())

    return SDValue();


  using namespace SDPatternMatch;

  SDValue LoadNode;

  APInt MaskVal, ShiftVal;

  // (and (shl (load ...), ShiftAmt), Mask)

  if (!sd_match(

          N, m_And(m_OneUse(m_Shl(m_Value(LoadNode, m_SpecificOpc(ISD::LOAD)),

                                  m_ConstInt(ShiftVal))),

                   m_ConstInt(MaskVal)))) {

    return SDValue();

  }


  uint64_t ShiftAmt = ShiftVal.getZExtValue();


  if (ShiftAmt >= VT.getSizeInBits())

    return SDValue();


  // Calculate the appropriate mask if it were applied before the shift.

  APInt InnerMask = MaskVal.lshr(ShiftAmt);

  bool IsNarrowable =

      InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;


  if (!IsNarrowable)

    return SDValue();


  // AND the loaded value and change the shift appropriately, allowing

  // the load to be narrowed.

  SDLoc DL(N);

  SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,

                                 DAG.getConstant(InnerMask, DL, VT));

  return DAG.getNode(ISD::SHL, DL, VT, InnerAnd,

                     DAG.getShiftAmountConstant(ShiftAmt, VT, DL));

}


// Combines two comparison operation and logic operation to one selection

// operation(min, max) and logic operation. Returns new constructed Node if

// conditions for optimization are satisfied.


static SDValue performANDCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;

  SDValue N0 = N->getOperand(0);


  // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero

  // extending X. This is safe since we only need the LSB after the shift and

  // shift amounts larger than 31 would produce poison. If we wait until

  // type legalization, we'll create RISCVISD::SRLW and we can't recover it

  // to use a BEXT instruction.

  if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&

      N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&

      N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&

      N0.hasOneUse()) {

    SDLoc DL(N);

    SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));

    SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));

    SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);

    SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,

                              DAG.getConstant(1, DL, MVT::i64));

    return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);

  }


  if (SDValue V = combineNarrowableShiftedLoad(N, DAG))

    return V;

  if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))

    return V;

  if (DCI.isAfterLegalizeDAG())

    if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))

      return V;

  if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))

    return V;

  if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

    return V;

  if (SDValue V = reduceANDOfAtomicLoad(N, DCI))

    return V;


  if (DCI.isAfterLegalizeDAG())

    if (SDValue V = combineDeMorganOfBoolean(N, DAG))

      return V;


  // fold (and (select lhs, rhs, cc, -1, y), x) ->

  //      (select lhs, rhs, cc, x, (and x, y))

  return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);

}


// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.

// FIXME: Generalize to other binary operators with same operand.


static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,

                                SelectionDAG &DAG) {

  assert(N->getOpcode() == ISD::OR && "Unexpected opcode");


  if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||

      N1.getOpcode() != RISCVISD::CZERO_NEZ ||

      !N0.hasOneUse() || !N1.hasOneUse())

    return SDValue();


  // Should have the same condition.

  SDValue Cond = N0.getOperand(1);

  if (Cond != N1.getOperand(1))

    return SDValue();


  SDValue TrueV = N0.getOperand(0);

  SDValue FalseV = N1.getOperand(0);


  if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||

      TrueV.getOperand(1) != FalseV.getOperand(1) ||

      !isOneConstant(TrueV.getOperand(1)) ||

      !TrueV.hasOneUse() || !FalseV.hasOneUse())

    return SDValue();


  EVT VT = N->getValueType(0);

  SDLoc DL(N);


  SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),

                              Cond);

  SDValue NewN1 =

      DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);

  SDValue NewOr =

      DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);

  return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));

}


// (xor X, (xor (and X, C2), Y))

// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)

// where C2 is a shifted mask with width = Width and shift = ShAmt

// qc_insb might become qc.insb or qc.insbi depending on the operands.


static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG,

                                          const RISCVSubtarget &Subtarget) {

  if (!Subtarget.hasVendorXqcibm())

    return SDValue();


  using namespace SDPatternMatch;

  SDValue Base, Inserted;

  APInt CMask;

  if (!sd_match(N, m_Xor(m_Value(Base),

                         m_OneUse(m_Xor(m_OneUse(m_And(m_Deferred(Base),

                                                       m_ConstInt(CMask))),

                                        m_Value(Inserted))))))

    return SDValue();


  if (N->getValueType(0) != MVT::i32)

    return SDValue();

  unsigned Width, ShAmt;

  if (!CMask.isShiftedMask(ShAmt, Width))

    return SDValue();


  // Check if all zero bits in CMask are also zero in Inserted

  if (!DAG.MaskedValueIsZero(Inserted, ~CMask))

    return SDValue();


  SDLoc DL(N);


  // `Inserted` needs to be right shifted before it is put into the

  // instruction.

  Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,

                         DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));


  SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),

                   DAG.getConstant(ShAmt, DL, MVT::i32)};

  return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);

}


static SDValue combineOrToBitfieldInsert(SDNode *N, SelectionDAG &DAG,

                                         const RISCVSubtarget &Subtarget) {

  if (!Subtarget.hasVendorXqcibm())

    return SDValue();


  using namespace SDPatternMatch;


  SDValue X;

  APInt MaskImm;

  if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))

    return SDValue();


  unsigned ShAmt, Width;

  if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))

    return SDValue();


  if (N->getValueType(0) != MVT::i32)

    return SDValue();


  // If Zbs is enabled and it is a single bit set we can use BSETI which

  // can be compressed to C_BSETI when Xqcibm in enabled.

  if (Width == 1 && Subtarget.hasStdExtZbs())

    return SDValue();


  // If C1 is a shifted mask (but can't be formed as an ORI),

  // use a bitfield insert of -1.

  // Transform (or x, C1)

  //        -> (qc.insbi x, -1, width, shift)

  SDLoc DL(N);


  SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),

                   DAG.getConstant(Width, DL, MVT::i32),

                   DAG.getConstant(ShAmt, DL, MVT::i32)};

  return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);

}


// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value

// being inserted only sets known zero bits.


static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG,

                                            const RISCVSubtarget &Subtarget) {

  // Supported only in Xqcibm for now.

  if (!Subtarget.hasVendorXqcibm())

    return SDValue();


  using namespace SDPatternMatch;


  SDValue Inserted;

  APInt MaskImm, OrImm;

  if (!sd_match(

          N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),

                                                        m_ConstInt(MaskImm))),

                                         m_ConstInt(OrImm)))))

    return SDValue();


  // Compute the Known Zero for the AND as this allows us to catch more general

  // cases than just looking for AND with imm.

  KnownBits Known = DAG.computeKnownBits(N->getOperand(0));


  // The bits being inserted must only set those bits that are known to be

  // zero.

  if (!OrImm.isSubsetOf(Known.Zero)) {

    // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't

    // currently handle this case.

    return SDValue();

  }


  unsigned ShAmt, Width;

  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).

  if (!Known.Zero.isShiftedMask(ShAmt, Width))

    return SDValue();


  // QC_INSB(I) dst, src, #width, #shamt.

  SDLoc DL(N);


  SDValue ImmNode =

      DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);


  SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),

                   DAG.getConstant(ShAmt, DL, MVT::i32)};

  return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);

}


static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,

                                const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;


  if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))

    return V;

  if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))

    return V;

  if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

    return V;


  if (DCI.isAfterLegalizeDAG()) {

    if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))

      return V;

    if (SDValue V = combineDeMorganOfBoolean(N, DAG))

      return V;

  }


  // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.

  // We may be able to pull a common operation out of the true and false value.

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))

    return V;

  if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))

    return V;


  // fold (or (select cond, 0, y), x) ->

  //      (select cond, x, (or x, y))

  return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);

}


static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use

  // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create

  // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.

  if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&

      N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&

      N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&

      !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {

    SDLoc DL(N);

    SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));

    SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));

    SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);

    SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);

    return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);

  }


  // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)

  // NOTE: Assumes ROL being legal means ROLW is legal.

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  if (N0.getOpcode() == RISCVISD::SLLW &&

      isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&

      TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {

    SDLoc DL(N);

    return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,

                       DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));

  }


  // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)

  if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {

    auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));

    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();

    if (ConstN00 && CC == ISD::SETLT) {

      EVT VT = N0.getValueType();

      SDLoc DL(N0);

      const APInt &Imm = ConstN00->getAPIntValue();

      if ((Imm + 1).isSignedIntN(12))

        return DAG.getSetCC(DL, VT, N0.getOperand(1),

                            DAG.getConstant(Imm + 1, DL, VT), CC);

    }

  }


  if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))

    return V;


  if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))

    return V;

  if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

    return V;


  // fold (xor (select cond, 0, y), x) ->

  //      (select cond, x, (xor x, y))

  return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);

}


// Try to expand a multiply to a sequence of shifts and add/subs,

// for a machine without native mul instruction.


static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,

                                      uint64_t MulAmt) {

  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  const uint64_t BitWidth = VT.getFixedSizeInBits();


  SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));

  SDValue N0 = N->getOperand(0);


  // Find the Non-adjacent form of the multiplier.

  for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {

    if (E & 1) {

      bool IsAdd = (E & 3) == 1;

      E -= IsAdd ? 1 : -1;

      SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,

                                     DAG.getShiftAmountConstant(I, VT, DL));

      ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;

      Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);

    }

  }


  return Result;

}


// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))


static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,

                                        uint64_t MulAmt) {

  uint64_t MulAmtLowBit = MulAmt & (-MulAmt);

  SDValue X = N->getOperand(0);

  ISD::NodeType Op;

  uint64_t ShiftAmt1;

  bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit);

  auto PreferSub = [X, MulAmtLowBit]() {

    // For MulAmt == 3 << M both (X << M + 2) - (X << M)

    // and (X << M + 1) + (X << M) are valid expansions.

    // Prefer SUB if we can get (X << M + 2) for free,

    // because X is exact (Y >> M + 2).

    uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;

    using namespace SDPatternMatch;

    return sd_match(X, m_ExactSr(m_Value(), m_SpecificInt(ShAmt)));

  };

  if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {

    Op = ISD::ADD;

    ShiftAmt1 = MulAmt - MulAmtLowBit;

  } else if (CanSub) {

    Op = ISD::SUB;

    ShiftAmt1 = MulAmt + MulAmtLowBit;

  } else {

    return SDValue();

  }

  EVT VT = N->getValueType(0);

  SDLoc DL(N);

  SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X,

                               DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));

  SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X,

                               DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));

  return DAG.getNode(Op, DL, VT, Shift1, Shift2);

}


static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,

                               unsigned ShY, bool AddX, unsigned Shift) {

  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  SDValue X = N->getOperand(0);

  // Put the shift first if we can fold:

  // a. a zext into the shift forming a slli.uw

  // b. an exact shift right forming one shorter shift or no shift at all

  using namespace SDPatternMatch;

  if (Shift != 0 &&

      sd_match(X, m_AnyOf(m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))),

                          m_ExactSr(m_Value(), m_ConstInt())))) {

    X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));

    Shift = 0;

  }

  SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,

                               DAG.getTargetConstant(ShY, DL, VT), X);

  if (ShX != 0)

    ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,

                         DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);

  if (Shift == 0)

    return ShlAdd;

  // Otherwise, put the shl last so that it can fold with following instructions

  // (e.g. sext or add).

  return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));

}


static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,

                                       uint64_t MulAmt, unsigned Shift) {

  switch (MulAmt) {

  // 3/5/9 -> (shYadd X, X)

  case 3:

    return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);

  case 5:

    return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);

  case 9:

    return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);

  // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))

  case 5 * 3:

    return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);

  case 9 * 3:

    return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);

  case 5 * 5:

    return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);

  case 9 * 5:

    return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);

  case 9 * 9:

    return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);

  default:

    break;

  }


  int ShX;

  if (int ShY = isShifted359(MulAmt - 1, ShX)) {

    assert(ShX != 0 && "MulAmt=4,6,10 handled before");

    // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)

    if (ShX <= 3)

      return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);

    // 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X)

    if (Shift == 0) {

      SDLoc DL(N);

      EVT VT = N->getValueType(0);

      SDValue X = N->getOperand(0);

      SDValue Shl =

          DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT));

      SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,

                                   DAG.getTargetConstant(ShY, DL, VT), Shl);

      return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X);

    }

  }

  return SDValue();

}


// Try to expand a scalar multiply to a faster sequence.


static SDValue expandMul(SDNode *N, SelectionDAG &DAG,

                         TargetLowering::DAGCombinerInfo &DCI,

                         const RISCVSubtarget &Subtarget) {


  EVT VT = N->getValueType(0);


  // LI + MUL is usually smaller than the alternative sequence.

  if (DAG.getMachineFunction().getFunction().hasMinSize())

    return SDValue();


  if (VT != Subtarget.getXLenVT())

    return SDValue();


  bool ShouldExpandMul =

      (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||

      !Subtarget.hasStdExtZmmul();

  if (!ShouldExpandMul)

    return SDValue();


  ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));

  if (!CNode)

    return SDValue();

  uint64_t MulAmt = CNode->getZExtValue();


  // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.

  if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))

    return SDValue();


  // WARNING: The code below is knowingly incorrect with regards to undef

  // semantics.  We're adding additional uses of X here, and in principle, we

  // should be freezing X before doing so.  However, adding freeze here causes

  // real regressions, and no other target properly freezes X in these cases

  // either.

  if (Subtarget.hasShlAdd(3)) {

    // 3/5/9 * 2^N -> (shl (shXadd X, X), N)

    // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples

    // of 25 which happen to be quite common.

    // (2/4/8 * 3/5/9 + 1) * 2^N

    unsigned Shift = llvm::countr_zero(MulAmt);

    if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))

      return V;


    // If this is a power 2 + 2/4/8, we can use a shift followed by a single

    // shXadd. First check if this a sum of two power of 2s because that's

    // easy. Then count how many zeros are up to the first bit.

    SDValue X = N->getOperand(0);

    if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {

      unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));

      SDLoc DL(N);

      SDValue Shift1 =

          DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));

      return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,

                         DAG.getTargetConstant(Shift, DL, VT), Shift1);

    }


    // TODO: 2^(C1>3) * 3/5/9 - 1


    // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))

    if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {

      unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);

      if (ScaleShift >= 1 && ScaleShift < 4) {

        unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));

        SDLoc DL(N);

        SDValue Shift1 =

            DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));

        return DAG.getNode(

            ISD::ADD, DL, VT, Shift1,

            DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,

                        DAG.getTargetConstant(ScaleShift, DL, VT), X));

      }

    }


    // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))

    for (uint64_t Offset : {3, 5, 9}) {

      if (isPowerOf2_64(MulAmt + Offset)) {

        unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);

        if (ShAmt >= VT.getSizeInBits())

          continue;

        SDLoc DL(N);

        SDValue Shift1 =

            DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));

        SDValue Mul359 =

            DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,

                        DAG.getTargetConstant(Log2_64(Offset - 1), DL, VT), X);

        return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);

      }

    }

  }


  if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))

    return V;


  if (!Subtarget.hasStdExtZmmul())

    return expandMulToNAFSequence(N, DAG, MulAmt);


  return SDValue();

}


// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->

// (bitcast (sra (v2Xi16 (bitcast X)), 15))

// Same for other equivalent types with other equivalent constants.


static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) {

  EVT VT = N->getValueType(0);

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();


  // Do this for legal vectors unless they are i1 or i8 vectors.

  if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)

    return SDValue();


  if (N->getOperand(0).getOpcode() != ISD::AND ||

      N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)

    return SDValue();


  SDValue And = N->getOperand(0);

  SDValue Srl = And.getOperand(0);


  APInt V1, V2, V3;

  if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||

      !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||

      !ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3))

    return SDValue();


  unsigned HalfSize = VT.getScalarSizeInBits() / 2;

  if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||

      V3 != (HalfSize - 1))

    return SDValue();


  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),

                                EVT::getIntegerVT(*DAG.getContext(), HalfSize),

                                VT.getVectorElementCount() * 2);

  SDLoc DL(N);

  SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));

  SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,

                            DAG.getConstant(HalfSize - 1, DL, HalfVT));

  return DAG.getNode(ISD::BITCAST, DL, VT, Sra);

}


static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const RISCVSubtarget &Subtarget) {

  EVT VT = N->getValueType(0);

  if (!VT.isVector())

    return expandMul(N, DAG, DCI, Subtarget);


  SDLoc DL(N);

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  SDValue MulOper;

  unsigned AddSubOpc;


  // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)

  //        (mul x, add (y, 1)) -> (add x, (mul x, y))

  // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))

  //         (mul x, (sub 1, y)) -> (sub x, (mul x, y))

  auto IsAddSubWith1 = [&](SDValue V) -> bool {

    AddSubOpc = V->getOpcode();

    if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {

      SDValue Opnd = V->getOperand(1);

      MulOper = V->getOperand(0);

      if (AddSubOpc == ISD::SUB)

        std::swap(Opnd, MulOper);

      if (isOneOrOneSplat(Opnd))

        return true;

    }

    return false;

  };


  if (IsAddSubWith1(N0)) {

    SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);

    return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);

  }


  if (IsAddSubWith1(N1)) {

    SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);

    return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);

  }


  if (SDValue V = combineBinOpOfZExt(N, DAG))

    return V;


  if (SDValue V = combineVectorMulToSraBitcast(N, DAG))

    return V;


  return SDValue();

}


/// According to the property that indexed load/store instructions zero-extend

/// their indices, try to narrow the type of index operand.


static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {

  if (isIndexTypeSigned(IndexType))

    return false;


  if (!N->hasOneUse())

    return false;


  EVT VT = N.getValueType();

  SDLoc DL(N);


  // In general, what we're doing here is seeing if we can sink a truncate to

  // a smaller element type into the expression tree building our index.

  // TODO: We can generalize this and handle a bunch more cases if useful.


  // Narrow a buildvector to the narrowest element type.  This requires less

  // work and less register pressure at high LMUL, and creates smaller constants

  // which may be cheaper to materialize.

  if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {

    KnownBits Known = DAG.computeKnownBits(N);

    unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());

    LLVMContext &C = *DAG.getContext();

    EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);

    if (ResultVT.bitsLT(VT.getVectorElementType())) {

      N = DAG.getNode(ISD::TRUNCATE, DL,

                      VT.changeVectorElementType(C, ResultVT), N);

      return true;

    }

  }


  // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).

  if (N.getOpcode() != ISD::SHL)

    return false;


  SDValue N0 = N.getOperand(0);

  if (N0.getOpcode() != ISD::ZERO_EXTEND &&

      N0.getOpcode() != RISCVISD::VZEXT_VL)

    return false;

  if (!N0->hasOneUse())

    return false;


  APInt ShAmt;

  SDValue N1 = N.getOperand(1);

  if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))

    return false;


  SDValue Src = N0.getOperand(0);

  EVT SrcVT = Src.getValueType();

  unsigned SrcElen = SrcVT.getScalarSizeInBits();

  unsigned ShAmtV = ShAmt.getZExtValue();

  unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);

  NewElen = std::max(NewElen, 8U);


  // Skip if NewElen is not narrower than the original extended type.

  if (NewElen >= N0.getValueType().getScalarSizeInBits())

    return false;


  EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);

  EVT NewVT = SrcVT.changeVectorElementType(*DAG.getContext(), NewEltVT);


  SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());

  SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);

  N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);

  return true;

}


/// Try to map an integer comparison with size > XLEN to vector instructions

/// before type legalization splits it up into chunks.

static SDValue


combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,

                                const SDLoc &DL, SelectionDAG &DAG,

                                const RISCVSubtarget &Subtarget) {

  assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");


  if (!Subtarget.hasVInstructions())

    return SDValue();


  MVT XLenVT = Subtarget.getXLenVT();

  EVT OpVT = X.getValueType();

  // We're looking for an oversized integer equality comparison.

  if (!OpVT.isScalarInteger())

    return SDValue();


  unsigned OpSize = OpVT.getSizeInBits();

  // The size should be larger than XLen and smaller than the maximum vector

  // size.

  if (OpSize <= Subtarget.getXLen() ||

      OpSize > Subtarget.getRealMinVLen() *

                   Subtarget.getMaxLMULForFixedLengthVectors())

    return SDValue();


  // Don't perform this combine if constructing the vector will be expensive.

  auto IsVectorBitCastCheap = [](SDValue X) {

    X = peekThroughBitcasts(X);

    return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||

           X.getOpcode() == ISD::LOAD;

  };

  if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))

    return SDValue();


  if (DAG.getMachineFunction().getFunction().hasFnAttribute(

          Attribute::NoImplicitFloat))

    return SDValue();


  // Bail out for non-byte-sized types.

  if (!OpVT.isByteSized())

    return SDValue();


  // Find a preferred vector element type by inspecting how the value is used.

  auto GetPreferredEltVT = [](SDValue V) -> MVT {

    // Look backwards: check if V itself is derived from a vector

    SDValue Peek = peekThroughBitcasts(V);

    EVT PeekVT = Peek.getValueType();


    if (PeekVT.isVector() && PeekVT.isInteger()) {

      EVT EltVT = PeekVT.getVectorElementType();

      if (EltVT.isSimple())

        return EltVT.getSimpleVT();

    }


    // Look forwards: check if V is bitcasted to a vector elsewhere in the DAG

    for (SDUse &Use : V->uses()) {

      // Ensure we are checking the use of the specific value result, not the

      // node's chain

      if (Use.getResNo() != V.getResNo())

        continue;


      SDNode *User = Use.getUser();

      if (User->getOpcode() == ISD::BITCAST) {

        EVT CastVT = User->getValueType(0);

        if (CastVT.isVector() && CastVT.isInteger()) {

          EVT EltVT = CastVT.getVectorElementType();

          if (EltVT.isSimple())

            return EltVT.getSimpleVT();

        }

      }

    }

    return MVT::INVALID_SIMPLE_VALUE_TYPE;

  };


  auto IsValidEltVT = [&](MVT VT) {

    if (VT == MVT::INVALID_SIMPLE_VALUE_TYPE || !VT.isInteger())

      return false;


    // Make sure we don't try to create an impossible vector type where the

    // elements don't perfectly fill up the OpSize boundary.

    unsigned EltSize = VT.getSizeInBits();

    if (OpSize % EltSize != 0)

      return false;


    // Construct the proposed vector type to check its legality

    unsigned NumElts = OpSize / EltSize;

    EVT TestVecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts);

    return Subtarget.getTargetLowering()->isTypeLegal(TestVecVT);

  };


  // Get preferred VT from either X or Y.

  MVT EltVT = GetPreferredEltVT(X);

  if (!IsValidEltVT(EltVT))

    EltVT = GetPreferredEltVT(Y);


  // If both are unsuitable, use the safe default (i8)

  if (!IsValidEltVT(EltVT))

    EltVT = MVT::i8;


  unsigned EltSize = EltVT.getSizeInBits();

  unsigned NumElts = OpSize / EltSize;

  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);

  EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);


  SDValue VecX = DAG.getBitcast(VecVT, X);

  SDValue VecY = DAG.getBitcast(VecVT, Y);

  SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);

  SDValue VL = DAG.getConstant(NumElts, DL, XLenVT);


  SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);

  return DAG.getSetCC(DL, VT,

                      DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,

                                  DAG.getConstant(0, DL, XLenVT), Cmp, Mask,

                                  VL),

                      DAG.getConstant(0, DL, XLenVT), CC);

}


static SDValue performSETCCCombine(SDNode *N,

                                   TargetLowering::DAGCombinerInfo &DCI,

                                   const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;

  SDLoc dl(N);

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  EVT VT = N->getValueType(0);

  EVT OpVT = N0.getValueType();


  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();

  // Looking for an equality compare.

  if (!isIntEqualitySetCC(Cond))

    return SDValue();


  if (SDValue V =

          combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))

    return V;


  if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&

      N0.getOpcode() == ISD::AND && N0.hasOneUse() &&

      isa<ConstantSDNode>(N0.getOperand(1))) {

    const APInt &AndRHSC = N0.getConstantOperandAPInt(1);

    // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.

    if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&

        AndRHSC.isNegatedPowerOf2()) {

      unsigned ShiftBits = AndRHSC.countr_zero();

      SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),

                                  DAG.getConstant(ShiftBits, dl, OpVT));

      return DAG.getSetCC(dl, VT, Shift, N1, Cond);

    }


    // Similar to above but handling the lower 32 bits by using sraiw. Allow

    // comparing with constants other than 0 if the constant can be folded into

    // addi or xori after shifting.

    uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();

    uint64_t AndRHSInt = AndRHSC.getZExtValue();

    if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&

        isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {

      unsigned ShiftBits = llvm::countr_zero(AndRHSInt);

      int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;

      if (NewC >= -2048 && NewC <= 2048) {

        SDValue SExt =

            DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),

                        DAG.getValueType(MVT::i32));

        SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,

                                    DAG.getConstant(ShiftBits, dl, OpVT));

        return DAG.getSetCC(dl, VT, Shift,

                            DAG.getSignedConstant(NewC, dl, OpVT), Cond);

      }

    }


    // Fold (and X, Mask) ==/!= C -> X ==/!= sext(C, countr_one(Mask)) if the

    // Mask is only clearing redundant sign bits.

    if (isMask_64(AndRHSInt)) {

      unsigned TrailingOnes = llvm::countr_one(AndRHSInt);

      unsigned N1Width = llvm::bit_width(N1Int);

      int64_t N1SExt = SignExtend64(N1Int, TrailingOnes);

      if (N1Width <= TrailingOnes && isInt<12>(N1SExt) &&

          DAG.ComputeMaxSignificantBits(N0.getOperand(0)) <= TrailingOnes)

        return DAG.getSetCC(dl, VT, N0.getOperand(0),

                            DAG.getSignedConstant(N1SExt, dl, OpVT), Cond);

    }

  }


  // Replace (seteq (i64 (and X, 0xffffffff)), C1) with

  // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from

  // bit 31. Same for setne. C1' may be cheaper to materialize and the

  // sext_inreg can become a sext.w instead of a shift pair.

  if (OpVT != MVT::i64 || !Subtarget.is64Bit())

    return SDValue();


  // RHS needs to be a constant.

  auto *N1C = dyn_cast<ConstantSDNode>(N1);

  if (!N1C)

    return SDValue();


  // LHS needs to be (and X, 0xffffffff).

  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||

      !isa<ConstantSDNode>(N0.getOperand(1)) ||

      N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))

    return SDValue();


  // Don't do this if the sign bit is provably zero, it will be turned back into

  // an AND.

  APInt SignMask = APInt::getOneBitSet(64, 31);

  if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))

    return SDValue();


  const APInt &C1 = N1C->getAPIntValue();


  // If the constant is larger than 2^32 - 1 it is impossible for both sides

  // to be equal.

  if (C1.getActiveBits() > 32)

    return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);


  SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,

                               N0.getOperand(0), DAG.getValueType(MVT::i32));

  return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),

                                                      dl, OpVT), Cond);

}


static SDValue


performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,

                                const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;

  SDValue Src = N->getOperand(0);

  EVT VT = N->getValueType(0);

  EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();

  unsigned Opc = Src.getOpcode();

  SDLoc DL(N);


  // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)

  // Don't do this with Zhinx. We need to explicitly sign extend the GPR.

  if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&

      Subtarget.hasStdExtZfhmin())

    return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));


  // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32

  if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&

      VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&

      DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)

    return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),

                       Src.getOperand(1));


  // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))

  if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())

    return DAG.getNegative(Src, DL, VT);


  // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)

  if (Opc == ISD::XOR && SrcVT == MVT::i1 &&

      isAllOnesConstant(Src.getOperand(1)) &&

      Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())

    return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),

                       DAG.getAllOnesConstant(DL, VT));


  return SDValue();

}


namespace {

// Forward declaration of the structure holding the necessary information to

// apply a combine.

struct CombineResult;


enum ExtKind : uint8_t {

  ZExt = 1 << 0,

  SExt = 1 << 1,

  FPExt = 1 << 2,

  BF16Ext = 1 << 3

};

/// Helper class for folding sign/zero extensions.

/// In particular, this class is used for the following combines:

/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w

/// sub | sub_vl -> vwsub(u) | vwsub(u)_w

/// mul | mul_vl -> vwmul(u) | vwmul_su

/// shl | shl_vl -> vwsll

/// fadd -> vfwadd | vfwadd_w

/// fsub -> vfwsub | vfwsub_w

/// fmul -> vfwmul

/// An object of this class represents an operand of the operation we want to

/// combine.

/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of

/// NodeExtensionHelper for `a` and one for `b`.

///

/// This class abstracts away how the extension is materialized and

/// how its number of users affect the combines.

///

/// In particular:

/// - VWADD_W is conceptually == add(op0, sext(op1))

/// - VWADDU_W == add(op0, zext(op1))

/// - VWSUB_W == sub(op0, sext(op1))

/// - VWSUBU_W == sub(op0, zext(op1))

/// - VFWADD_W == fadd(op0, fpext(op1))

/// - VFWSUB_W == fsub(op0, fpext(op1))

/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to

/// zext|sext(smaller_value).

struct NodeExtensionHelper {

  /// Records if this operand is like being zero extended.

  bool SupportsZExt;

  /// Records if this operand is like being sign extended.

  /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For

  /// instance, a splat constant (e.g., 3), would support being both sign and

  /// zero extended.

  bool SupportsSExt;

  /// Records if this operand is like being floating point extended.

  bool SupportsFPExt;

  /// Records if this operand is extended from bf16.

  bool SupportsBF16Ext;

  /// This boolean captures whether we care if this operand would still be

  /// around after the folding happens.

  bool EnforceOneUse;

  /// Original value that this NodeExtensionHelper represents.

  SDValue OrigOperand;


  /// Get the value feeding the extension or the value itself.

  /// E.g., for zext(a), this would return a.

  SDValue getSource() const {

    switch (OrigOperand.getOpcode()) {

    case ISD::ZERO_EXTEND:

    case ISD::SIGN_EXTEND:

    case RISCVISD::VSEXT_VL:

    case RISCVISD::VZEXT_VL:

    case RISCVISD::FP_EXTEND_VL:

      return OrigOperand.getOperand(0);

    default:

      return OrigOperand;

    }

  }


  /// Check if this instance represents a splat.

  bool isSplat() const {

    return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||

           OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;

  }


  /// Get the extended opcode.

  unsigned getExtOpc(ExtKind SupportsExt) const {

    switch (SupportsExt) {

    case ExtKind::SExt:

      return RISCVISD::VSEXT_VL;

    case ExtKind::ZExt:

      return RISCVISD::VZEXT_VL;

    case ExtKind::FPExt:

    case ExtKind::BF16Ext:

      return RISCVISD::FP_EXTEND_VL;

    }

    llvm_unreachable("Unknown ExtKind enum");

  }


  /// Get or create a value that can feed \p Root with the given extension \p

  /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this

  /// operand. \see ::getSource().

  SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,

                                const RISCVSubtarget &Subtarget,

                                std::optional<ExtKind> SupportsExt) const {

    if (!SupportsExt.has_value())

      return OrigOperand;


    MVT NarrowVT = getNarrowType(Root, *SupportsExt);


    SDValue Source = getSource();

    assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));

    if (Source.getValueType() == NarrowVT)

      return Source;


    unsigned ExtOpc = getExtOpc(*SupportsExt);


    // If we need an extension, we should be changing the type.

    SDLoc DL(OrigOperand);

    auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);

    switch (OrigOperand.getOpcode()) {

    case ISD::ZERO_EXTEND:

    case ISD::SIGN_EXTEND:

    case RISCVISD::VSEXT_VL:

    case RISCVISD::VZEXT_VL:

    case RISCVISD::FP_EXTEND_VL:

      return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);

    case ISD::SPLAT_VECTOR:

      return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));

    case RISCVISD::VMV_V_X_VL:

      return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,

                         DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);

    case RISCVISD::VFMV_V_F_VL:

      Source = Source.getOperand(1);

      assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");

      Source = Source.getOperand(0);

      assert(Source.getValueType() == NarrowVT.getVectorElementType());

      return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,

                         DAG.getUNDEF(NarrowVT), Source, VL);

    default:

      // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL

      // and that operand should already have the right NarrowVT so no

      // extension should be required at this point.

      llvm_unreachable("Unsupported opcode");

    }

  }


  /// Helper function to get the narrow type for \p Root.

  /// The narrow type is the type of \p Root where we divided the size of each

  /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.

  /// \pre Both the narrow type and the original type should be legal.

  static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {

    MVT VT = Root->getSimpleValueType(0);


    // Determine the narrow size.

    unsigned NarrowSize = VT.getScalarSizeInBits() / 2;


    MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16

                : SupportsExt == ExtKind::FPExt

                    ? MVT::getFloatingPointVT(NarrowSize)

                    : MVT::getIntegerVT(NarrowSize);


    assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&

           "Trying to extend something we can't represent");

    MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());

    return NarrowVT;

  }


  /// Get the opcode to materialize:

  /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)

  static unsigned getSExtOpcode(unsigned Opcode) {

    switch (Opcode) {

    case ISD::ADD:

    case RISCVISD::ADD_VL:

    case RISCVISD::VWADD_W_VL:

    case RISCVISD::VWADDU_W_VL:

    case ISD::OR:

    case RISCVISD::OR_VL:

      return RISCVISD::VWADD_VL;

    case ISD::SUB:

    case RISCVISD::SUB_VL:

    case RISCVISD::VWSUB_W_VL:

    case RISCVISD::VWSUBU_W_VL:

      return RISCVISD::VWSUB_VL;

    case ISD::MUL:

    case RISCVISD::MUL_VL:

      return RISCVISD::VWMUL_VL;

    default:

      llvm_unreachable("Unexpected opcode");

    }

  }


  /// Get the opcode to materialize:

  /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)

  static unsigned getZExtOpcode(unsigned Opcode) {

    switch (Opcode) {

    case ISD::ADD:

    case RISCVISD::ADD_VL:

    case RISCVISD::VWADD_W_VL:

    case RISCVISD::VWADDU_W_VL:

    case ISD::OR:

    case RISCVISD::OR_VL:

      return RISCVISD::VWADDU_VL;

    case ISD::SUB:

    case RISCVISD::SUB_VL:

    case RISCVISD::VWSUB_W_VL:

    case RISCVISD::VWSUBU_W_VL:

      return RISCVISD::VWSUBU_VL;

    case ISD::MUL:

    case RISCVISD::MUL_VL:

      return RISCVISD::VWMULU_VL;

    case ISD::SHL:

    case RISCVISD::SHL_VL:

      return RISCVISD::VWSLL_VL;

    default:

      llvm_unreachable("Unexpected opcode");

    }

  }


  /// Get the opcode to materialize:

  /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)

  static unsigned getFPExtOpcode(unsigned Opcode) {

    switch (Opcode) {

    case RISCVISD::FADD_VL:

    case RISCVISD::VFWADD_W_VL:

      return RISCVISD::VFWADD_VL;

    case RISCVISD::FSUB_VL:

    case RISCVISD::VFWSUB_W_VL:

      return RISCVISD::VFWSUB_VL;

    case RISCVISD::FMUL_VL:

      return RISCVISD::VFWMUL_VL;

    case RISCVISD::VFMADD_VL:

      return RISCVISD::VFWMADD_VL;

    case RISCVISD::VFMSUB_VL:

      return RISCVISD::VFWMSUB_VL;

    case RISCVISD::VFNMADD_VL:

      return RISCVISD::VFWNMADD_VL;

    case RISCVISD::VFNMSUB_VL:

      return RISCVISD::VFWNMSUB_VL;

    default:

      llvm_unreachable("Unexpected opcode");

    }

  }


  /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->

  /// newOpcode(a, b).

  static unsigned getSUOpcode(unsigned Opcode) {

    assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&

           "SU is only supported for MUL");

    return RISCVISD::VWMULSU_VL;

  }


  /// Get the opcode to materialize

  /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).

  static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {

    switch (Opcode) {

    case ISD::ADD:

    case RISCVISD::ADD_VL:

    case ISD::OR:

    case RISCVISD::OR_VL:

      return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL

                                          : RISCVISD::VWADDU_W_VL;

    case ISD::SUB:

    case RISCVISD::SUB_VL:

      return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL

                                          : RISCVISD::VWSUBU_W_VL;

    case RISCVISD::FADD_VL:

      return RISCVISD::VFWADD_W_VL;

    case RISCVISD::FSUB_VL:

      return RISCVISD::VFWSUB_W_VL;

    default:

      llvm_unreachable("Unexpected opcode");

    }

  }


  using CombineToTry = std::function<std::optional<CombineResult>(

      SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,

      const NodeExtensionHelper & /*RHS*/, SelectionDAG &,

      const RISCVSubtarget &)>;


  /// Check if this node needs to be fully folded or extended for all users.

  bool needToPromoteOtherUsers() const { return EnforceOneUse; }


  void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

    unsigned Opc = OrigOperand.getOpcode();

    MVT VT = OrigOperand.getSimpleValueType();


    assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&

           "Unexpected Opcode");


    // The pasthru must be undef for tail agnostic.

    if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())

      return;


    // Get the scalar value.

    SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)

                                          : OrigOperand.getOperand(1);


    // See if we have enough sign bits or zero bits in the scalar to use a

    // widening opcode by splatting to smaller element size.

    unsigned EltBits = VT.getScalarSizeInBits();

    unsigned ScalarBits = Op.getValueSizeInBits();

    // If we're not getting all bits from the element, we need special handling.

    if (ScalarBits < EltBits) {

      // This should only occur on RV32.

      assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&

             !Subtarget.is64Bit() && "Unexpected splat");

      // vmv.v.x sign extends narrow inputs.

      SupportsSExt = true;


      // If the input is positive, then sign extend is also zero extend.

      if (DAG.SignBitIsZero(Op))

        SupportsZExt = true;


      EnforceOneUse = false;

      return;

    }


    unsigned NarrowSize = EltBits / 2;

    // If the narrow type cannot be expressed with a legal VMV,

    // this is not a valid candidate.

    if (NarrowSize < 8)

      return;


    if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)

      SupportsSExt = true;


    if (DAG.MaskedValueIsZero(Op,

                              APInt::getBitsSetFrom(ScalarBits, NarrowSize)))

      SupportsZExt = true;


    EnforceOneUse = false;

  }


  bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {

    return (NarrowEltVT == MVT::f32 ||

            (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));

  }


  bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {

    return NarrowEltVT == MVT::bf16 &&

           (Subtarget.hasStdExtZvfbfwma() || Subtarget.hasVInstructionsBF16());

  }


  /// Helper method to set the various fields of this struct based on the

  /// type of \p Root.

  void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,

                              const RISCVSubtarget &Subtarget) {

    SupportsZExt = false;

    SupportsSExt = false;

    SupportsFPExt = false;

    SupportsBF16Ext = false;

    EnforceOneUse = true;

    unsigned Opc = OrigOperand.getOpcode();

    // For the nodes we handle below, we end up using their inputs directly: see

    // getSource(). However since they either don't have a passthru or we check

    // that their passthru is undef, we can safely ignore their mask and VL.

    switch (Opc) {

    case ISD::ZERO_EXTEND:

    case ISD::SIGN_EXTEND: {

      MVT VT = OrigOperand.getSimpleValueType();

      if (!VT.isVector())

        break;


      SDValue NarrowElt = OrigOperand.getOperand(0);

      MVT NarrowVT = NarrowElt.getSimpleValueType();

      // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.

      if (NarrowVT.getVectorElementType() == MVT::i1)

        break;


      SupportsZExt = Opc == ISD::ZERO_EXTEND;

      SupportsSExt = Opc == ISD::SIGN_EXTEND;

      break;

    }

    case RISCVISD::VZEXT_VL:

      SupportsZExt = true;

      break;

    case RISCVISD::VSEXT_VL:

      SupportsSExt = true;

      break;

    case RISCVISD::FP_EXTEND_VL: {

      MVT NarrowEltVT =

          OrigOperand.getOperand(0).getSimpleValueType().getVectorElementType();

      if (isSupportedFPExtend(NarrowEltVT, Subtarget))

        SupportsFPExt = true;

      if (isSupportedBF16Extend(NarrowEltVT, Subtarget))

        SupportsBF16Ext = true;


      break;

    }

    case ISD::SPLAT_VECTOR:

    case RISCVISD::VMV_V_X_VL:

      fillUpExtensionSupportForSplat(Root, DAG, Subtarget);

      break;

    case RISCVISD::VFMV_V_F_VL: {

      MVT VT = OrigOperand.getSimpleValueType();


      if (!OrigOperand.getOperand(0).isUndef())

        break;


      SDValue Op = OrigOperand.getOperand(1);

      if (Op.getOpcode() != ISD::FP_EXTEND)

        break;


      unsigned NarrowSize = VT.getScalarSizeInBits() / 2;

      unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();

      if (NarrowSize != ScalarBits)

        break;


      if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))

        SupportsFPExt = true;

      if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),

                                Subtarget))

        SupportsBF16Ext = true;

      break;

    }

    default:

      break;

    }

  }


  /// Check if \p Root supports any extension folding combines.

  static bool isSupportedRoot(const SDNode *Root,

                              const RISCVSubtarget &Subtarget) {

    switch (Root->getOpcode()) {

    case ISD::ADD:

    case ISD::SUB:

    case ISD::MUL: {

      return Root->getValueType(0).isScalableVector();

    }

    case ISD::OR: {

      return Root->getValueType(0).isScalableVector() &&

             Root->getFlags().hasDisjoint();

    }

    // Vector Widening Integer Add/Sub/Mul Instructions

    case RISCVISD::ADD_VL:

    case RISCVISD::MUL_VL:

    case RISCVISD::VWADD_W_VL:

    case RISCVISD::VWADDU_W_VL:

    case RISCVISD::SUB_VL:

    case RISCVISD::VWSUB_W_VL:

    case RISCVISD::VWSUBU_W_VL:

    // Vector Widening Floating-Point Add/Sub/Mul Instructions

    case RISCVISD::FADD_VL:

    case RISCVISD::FSUB_VL:

    case RISCVISD::FMUL_VL:

    case RISCVISD::VFWADD_W_VL:

    case RISCVISD::VFWSUB_W_VL:

      return true;

    case RISCVISD::OR_VL:

      return Root->getFlags().hasDisjoint();

    case ISD::SHL:

      return Root->getValueType(0).isScalableVector() &&

             Subtarget.hasStdExtZvbb();

    case RISCVISD::SHL_VL:

      return Subtarget.hasStdExtZvbb();

    case RISCVISD::VFMADD_VL:

    case RISCVISD::VFNMSUB_VL:

    case RISCVISD::VFNMADD_VL:

    case RISCVISD::VFMSUB_VL:

      return true;

    default:

      return false;

    }

  }


  /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).

  NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,

                      const RISCVSubtarget &Subtarget) {

    assert(isSupportedRoot(Root, Subtarget) &&

           "Trying to build an helper with an "

           "unsupported root");

    assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");

    assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));

    OrigOperand = Root->getOperand(OperandIdx);


    unsigned Opc = Root->getOpcode();

    switch (Opc) {

    // We consider

    // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))

    // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))

    // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))

    case RISCVISD::VWADD_W_VL:

    case RISCVISD::VWADDU_W_VL:

    case RISCVISD::VWSUB_W_VL:

    case RISCVISD::VWSUBU_W_VL:

    case RISCVISD::VFWADD_W_VL:

    case RISCVISD::VFWSUB_W_VL:

      // Operand 1 can't be changed.

      if (OperandIdx == 1)

        break;

      [[fallthrough]];

    default:

      fillUpExtensionSupport(Root, DAG, Subtarget);

      break;

    }

  }


  /// Helper function to get the Mask and VL from \p Root.

  static std::pair<SDValue, SDValue>

  getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,

               const RISCVSubtarget &Subtarget) {

    assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");

    switch (Root->getOpcode()) {

    case ISD::ADD:

    case ISD::SUB:

    case ISD::MUL:

    case ISD::OR:

    case ISD::SHL: {

      SDLoc DL(Root);

      MVT VT = Root->getSimpleValueType(0);

      return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);

    }

    default:

      return std::make_pair(Root->getOperand(3), Root->getOperand(4));

    }

  }


  /// Helper function to check if \p N is commutative with respect to the

  /// foldings that are supported by this class.

  static bool isCommutative(const SDNode *N) {

    switch (N->getOpcode()) {

    case ISD::ADD:

    case ISD::MUL:

    case ISD::OR:

    case RISCVISD::ADD_VL:

    case RISCVISD::MUL_VL:

    case RISCVISD::OR_VL:

    case RISCVISD::FADD_VL:

    case RISCVISD::FMUL_VL:

    case RISCVISD::VFMADD_VL:

    case RISCVISD::VFNMSUB_VL:

    case RISCVISD::VFNMADD_VL:

    case RISCVISD::VFMSUB_VL:

      return true;

    case RISCVISD::VWADD_W_VL:

    case RISCVISD::VWADDU_W_VL:

    case ISD::SUB:

    case RISCVISD::SUB_VL:

    case RISCVISD::VWSUB_W_VL:

    case RISCVISD::VWSUBU_W_VL:

    case RISCVISD::VFWADD_W_VL:

    case RISCVISD::FSUB_VL:

    case RISCVISD::VFWSUB_W_VL:

    case ISD::SHL:

    case RISCVISD::SHL_VL:

      return false;

    default:

      llvm_unreachable("Unexpected opcode");

    }

  }


  /// Get a list of combine to try for folding extensions in \p Root.

  /// Note that each returned CombineToTry function doesn't actually modify

  /// anything. Instead they produce an optional CombineResult that if not None,

  /// need to be materialized for the combine to be applied.

  /// \see CombineResult::materialize.

  /// If the related CombineToTry function returns std::nullopt, that means the

  /// combine didn't match.

  static SmallVector<CombineToTry>

  getSupportedFoldings(const SDNode *Root, const RISCVSubtarget &Subtarget);

};


/// Helper structure that holds all the necessary information to materialize a

/// combine that does some extension folding.

struct CombineResult {

  /// Opcode to be generated when materializing the combine.

  unsigned TargetOpcode;

  // No value means no extension is needed.

  std::optional<ExtKind> LHSExt;

  std::optional<ExtKind> RHSExt;

  /// Root of the combine.

  SDNode *Root;

  /// LHS of the TargetOpcode.

  NodeExtensionHelper LHS;

  /// RHS of the TargetOpcode.

  NodeExtensionHelper RHS;


  CombineResult(unsigned TargetOpcode, SDNode *Root,

                const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,

                const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)

      : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),

        LHS(LHS), RHS(RHS) {}


  /// Return a value that uses TargetOpcode and that can be used to replace

  /// Root.

  /// The actual replacement is *not* done in that method.

  SDValue materialize(SelectionDAG &DAG,

                      const RISCVSubtarget &Subtarget) const {

    SDValue Mask, VL, Passthru;

    std::tie(Mask, VL) =

        NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);

    switch (Root->getOpcode()) {

    default:

      Passthru = Root->getOperand(2);

      break;

    case ISD::ADD:

    case ISD::SUB:

    case ISD::MUL:

    case ISD::OR:

    case ISD::SHL:

      Passthru = DAG.getUNDEF(Root->getValueType(0));

      break;

    }

    return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),

                       LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),

                       RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),

                       Passthru, Mask, VL);

  }

};


/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))

/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both

/// are zext) and LHS and RHS can be folded into Root.

/// AllowExtMask define which form `ext` can take in this pattern.

///

/// \note If the pattern can match with both zext and sext, the returned

/// CombineResult will feature the zext result.

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,

                                 const NodeExtensionHelper &RHS,

                                 uint8_t AllowExtMask, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)

    return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),

                         Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,

                         /*RHSExt=*/{ExtKind::ZExt});

  if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)

    return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),

                         Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,

                         /*RHSExt=*/{ExtKind::SExt});

  if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)

    return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),

                         Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,

                         /*RHSExt=*/{ExtKind::FPExt});

  if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&

      RHS.SupportsBF16Ext)

    return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),

                         Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,

                         /*RHSExt=*/{ExtKind::BF16Ext});

  return std::nullopt;

}


/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))

/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both

/// are zext) and LHS and RHS can be folded into Root.

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,

                             const NodeExtensionHelper &RHS, SelectionDAG &DAG,

                             const RISCVSubtarget &Subtarget) {

  return canFoldToVWWithSameExtensionImpl(

      Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,

      Subtarget);

}


/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,

                           const NodeExtensionHelper &RHS, SelectionDAG &DAG,

                           const RISCVSubtarget &Subtarget) {

  return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,

                                          Subtarget);

}


/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,

                           const NodeExtensionHelper &RHS, SelectionDAG &DAG,

                           const RISCVSubtarget &Subtarget) {

  return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,

                                          Subtarget);

}


/// Check if \p Root follows a pattern Root(LHS, ext(RHS))

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,

              const NodeExtensionHelper &RHS, SelectionDAG &DAG,

              const RISCVSubtarget &Subtarget) {

  if (RHS.SupportsFPExt)

    return CombineResult(

        NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),

        Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});


  // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar

  // sext/zext?

  // Control this behavior behind an option (AllowSplatInVW_W) for testing

  // purposes.

  if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))

    return CombineResult(

        NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,

        LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});

  if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))

    return CombineResult(

        NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,

        LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});

  return std::nullopt;

}


/// Check if \p Root follows a pattern Root(sext(LHS), RHS)

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,

                    const NodeExtensionHelper &RHS, SelectionDAG &DAG,

                    const RISCVSubtarget &Subtarget) {

  if (LHS.SupportsSExt)

    return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),

                         Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,

                         /*RHSExt=*/std::nullopt);

  return std::nullopt;

}


/// Check if \p Root follows a pattern Root(zext(LHS), RHS)

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,

                    const NodeExtensionHelper &RHS, SelectionDAG &DAG,

                    const RISCVSubtarget &Subtarget) {

  if (LHS.SupportsZExt)

    return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),

                         Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,

                         /*RHSExt=*/std::nullopt);

  return std::nullopt;

}


/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,

                     const NodeExtensionHelper &RHS, SelectionDAG &DAG,

                     const RISCVSubtarget &Subtarget) {

  if (LHS.SupportsFPExt)

    return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),

                         Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,

                         /*RHSExt=*/std::nullopt);

  return std::nullopt;

}


/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))

///

/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,

               const NodeExtensionHelper &RHS, SelectionDAG &DAG,

               const RISCVSubtarget &Subtarget) {


  if (!LHS.SupportsSExt || !RHS.SupportsZExt)

    return std::nullopt;

  return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),

                       Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,

                       /*RHSExt=*/{ExtKind::ZExt});

}


SmallVector<NodeExtensionHelper::CombineToTry>

NodeExtensionHelper::getSupportedFoldings(const SDNode *Root,

                                          const RISCVSubtarget &Subtarget) {

  SmallVector<CombineToTry> Strategies;

  switch (Root->getOpcode()) {

  case ISD::ADD:

  case ISD::SUB:

  case ISD::OR:

  case RISCVISD::ADD_VL:

  case RISCVISD::SUB_VL:

  case RISCVISD::OR_VL:

  case RISCVISD::FADD_VL:

  case RISCVISD::FSUB_VL:

    // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub

    Strategies.push_back(canFoldToVWWithSameExtension);

    if (Subtarget.hasVInstructionsBF16())

      Strategies.push_back(canFoldToVWWithSameExtBF16);

    // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w

    Strategies.push_back(canFoldToVW_W);

    break;

  case RISCVISD::FMUL_VL:

  case RISCVISD::VFMADD_VL:

  case RISCVISD::VFMSUB_VL:

  case RISCVISD::VFNMADD_VL:

  case RISCVISD::VFNMSUB_VL:

    Strategies.push_back(canFoldToVWWithSameExtension);

    if (Subtarget.hasVInstructionsBF16() ||

        (Subtarget.hasStdExtZvfbfwma() &&

         Root->getOpcode() == RISCVISD::VFMADD_VL))

      Strategies.push_back(canFoldToVWWithSameExtBF16);

    break;

  case ISD::MUL:

  case RISCVISD::MUL_VL:

    // mul -> vwmul(u)

    Strategies.push_back(canFoldToVWWithSameExtension);

    // mul -> vwmulsu

    Strategies.push_back(canFoldToVW_SU);

    break;

  case ISD::SHL:

  case RISCVISD::SHL_VL:

    // shl -> vwsll

    Strategies.push_back(canFoldToVWWithSameExtZEXT);

    break;

  case RISCVISD::VWADD_W_VL:

  case RISCVISD::VWSUB_W_VL:

    // vwadd_w|vwsub_w -> vwadd|vwsub

    Strategies.push_back(canFoldToVWWithSEXT);

    break;

  case RISCVISD::VWADDU_W_VL:

  case RISCVISD::VWSUBU_W_VL:

    // vwaddu_w|vwsubu_w -> vwaddu|vwsubu

    Strategies.push_back(canFoldToVWWithZEXT);

    break;

  case RISCVISD::VFWADD_W_VL:

  case RISCVISD::VFWSUB_W_VL:

    // vfwadd_w|vfwsub_w -> vfwadd|vfwsub

    Strategies.push_back(canFoldToVWWithFPEXT);

    break;

  default:

    llvm_unreachable("Unexpected opcode");

  }

  return Strategies;

}

} // End anonymous namespace.


static SDValue simplifyOp_VL(SDNode *N) {

  // TODO: Extend this to other binops using generic identity logic

  assert(N->getOpcode() == RISCVISD::ADD_VL);

  SDValue A = N->getOperand(0);

  SDValue B = N->getOperand(1);

  SDValue Passthru = N->getOperand(2);

  if (!Passthru.isUndef())

    // TODO:This could be a vmerge instead

    return SDValue();

  ;

  if (ISD::isConstantSplatVectorAllZeros(B.getNode()))

    return A;

  // Peek through fixed to scalable

  if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&

      ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))

    return A;

  return SDValue();

}


/// Combine a binary or FMA operation to its equivalent VW or VW_W form.

/// The supported combines are:

/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w

/// sub | sub_vl -> vwsub(u) | vwsub(u)_w

/// mul | mul_vl -> vwmul(u) | vwmul_su

/// shl | shl_vl -> vwsll

/// fadd_vl ->  vfwadd | vfwadd_w

/// fsub_vl ->  vfwsub | vfwsub_w

/// fmul_vl ->  vfwmul

/// vwadd_w(u) -> vwadd(u)

/// vwsub_w(u) -> vwsub(u)

/// vfwadd_w -> vfwadd

/// vfwsub_w -> vfwsub


static SDValue combineOp_VLToVWOp_VL(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI,

                                     const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;

  if (DCI.isBeforeLegalize())

    return SDValue();


  if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))

    return SDValue();


  SmallVector<SDNode *> Worklist;

  SmallPtrSet<SDNode *, 8> Inserted;

  SmallPtrSet<SDNode *, 8> ExtensionsToRemove;

  Worklist.push_back(N);

  Inserted.insert(N);

  SmallVector<CombineResult> CombinesToApply;


  while (!Worklist.empty()) {

    SDNode *Root = Worklist.pop_back_val();


    NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);

    NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);

    auto AppendUsersIfNeeded =

        [&Worklist, &Subtarget, &Inserted,

         &ExtensionsToRemove](const NodeExtensionHelper &Op) {

          if (Op.needToPromoteOtherUsers()) {

            // Remember that we're supposed to remove this extension.

            ExtensionsToRemove.insert(Op.OrigOperand.getNode());

            for (SDUse &Use : Op.OrigOperand->uses()) {

              SDNode *TheUser = Use.getUser();

              if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))

                return false;

              // We only support the first 2 operands of FMA.

              if (Use.getOperandNo() >= 2)

                return false;

              if (Inserted.insert(TheUser).second)

                Worklist.push_back(TheUser);

            }

          }

          return true;

        };


    // Control the compile time by limiting the number of node we look at in

    // total.

    if (Inserted.size() > ExtensionMaxWebSize)

      return SDValue();


    SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =

        NodeExtensionHelper::getSupportedFoldings(Root, Subtarget);


    assert(!FoldingStrategies.empty() && "Nothing to be folded");

    bool Matched = false;

    for (int Attempt = 0;

         (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;

         ++Attempt) {


      for (NodeExtensionHelper::CombineToTry FoldingStrategy :

           FoldingStrategies) {

        std::optional<CombineResult> Res =

            FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);

        if (Res) {

          // If this strategy wouldn't remove an extension we're supposed to

          // remove, reject it.

          if (!Res->LHSExt.has_value() &&

              ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))

            continue;

          if (!Res->RHSExt.has_value() &&

              ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))

            continue;


          Matched = true;

          CombinesToApply.push_back(*Res);

          // All the inputs that are extended need to be folded, otherwise

          // we would be leaving the old input (since it is may still be used),

          // and the new one.

          if (Res->LHSExt.has_value())

            if (!AppendUsersIfNeeded(LHS))

              return SDValue();

          if (Res->RHSExt.has_value())

            if (!AppendUsersIfNeeded(RHS))

              return SDValue();

          break;

        }

      }

      std::swap(LHS, RHS);

    }

    // Right now we do an all or nothing approach.

    if (!Matched)

      return SDValue();

  }

  // Store the value for the replacement of the input node separately.

  SDValue InputRootReplacement;

  // We do the RAUW after we materialize all the combines, because some replaced

  // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,

  // some of these nodes may appear in the NodeExtensionHelpers of some of the

  // yet-to-be-visited CombinesToApply roots.

  SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;

  ValuesToReplace.reserve(CombinesToApply.size());

  for (CombineResult Res : CombinesToApply) {

    SDValue NewValue = Res.materialize(DAG, Subtarget);

    if (!InputRootReplacement) {

      assert(Res.Root == N &&

             "First element is expected to be the current node");

      InputRootReplacement = NewValue;

    } else {

      ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);

    }

  }

  for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {

    DCI.CombineTo(OldNewValues.first.getNode(), OldNewValues.second);

  }

  return InputRootReplacement;

}


// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond

//      (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond

// y will be the Passthru and cond will be the Mask.


static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {

  unsigned Opc = N->getOpcode();

  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||

         Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);


  SDValue Y = N->getOperand(0);

  SDValue MergeOp = N->getOperand(1);

  unsigned MergeOpc = MergeOp.getOpcode();


  if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)

    return SDValue();


  SDValue X = MergeOp->getOperand(1);


  if (!MergeOp.hasOneUse())

    return SDValue();


  // Passthru should be undef

  SDValue Passthru = N->getOperand(2);

  if (!Passthru.isUndef())

    return SDValue();


  // Mask should be all ones

  SDValue Mask = N->getOperand(3);

  if (Mask.getOpcode() != RISCVISD::VMSET_VL)

    return SDValue();


  // False value of MergeOp should be all zeros

  SDValue Z = MergeOp->getOperand(2);


  if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&

      (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))

    Z = Z.getOperand(1);


  if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))

    return SDValue();


  return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),

                     {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},

                     N->getFlags());

}


// vwaddu C (vabd A B) -> vwabda(A B C)

// vwaddu C (vabdu A B) -> vwabdau(A B C)


static SDValue performVWABDACombine(SDNode *N, SelectionDAG &DAG,

                                    const RISCVSubtarget &Subtarget) {

  if (!Subtarget.hasStdExtZvabd())

    return SDValue();


  MVT VT = N->getSimpleValueType(0);

  if (VT.getVectorElementType() != MVT::i8 &&

      VT.getVectorElementType() != MVT::i16)

    return SDValue();


  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  SDValue Passthru = N->getOperand(2);

  if (!Passthru->isUndef())

    return SDValue();


  SDValue Mask = N->getOperand(3);

  SDValue VL = N->getOperand(4);

  auto IsABD = [](SDValue Op) {

    if (Op->getOpcode() != RISCVISD::ABDS_VL &&

        Op->getOpcode() != RISCVISD::ABDU_VL)

      return SDValue();

    return Op;

  };


  SDValue Diff = IsABD(Op0);

  Diff = Diff ? Diff : IsABD(Op1);

  if (!Diff)

    return SDValue();

  SDValue Acc = Diff == Op0 ? Op1 : Op0;


  SDLoc DL(N);

  Acc = DAG.getNode(RISCVISD::VZEXT_VL, DL, VT, Acc, Mask, VL);

  SDValue Result = DAG.getNode(

      Diff.getOpcode() == RISCVISD::ABDS_VL ? RISCVISD::VWABDA_VL

                                            : RISCVISD::VWABDAU_VL,

      DL, VT, Diff.getOperand(0), Diff.getOperand(1), Acc, Mask, VL);

  return Result;

}


// vwaddu_wv C (vabd A B) -> vwabda(A B C)

// vwaddu_wv C (zext (vabd A B)) -> vwabda(A (sext B) (sext C))

// vwaddu_wv C (vabdu A B) -> vwabdau(A B C)

// vwaddu_wv C (zext (vabdu A B)) -> vwabdau(A (zext B) (zext C))


static SDValue performVWABDACombineWV(SDNode *N, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  if (!Subtarget.hasStdExtZvabd())

    return SDValue();


  MVT VT = N->getSimpleValueType(0);

  // The result is widened, so we can accept i16/i32 here.

  if (VT.getVectorElementType() != MVT::i16 &&

      VT.getVectorElementType() != MVT::i32)

    return SDValue();


  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  SDValue Passthru = N->getOperand(2);

  if (!Passthru->isUndef())

    return SDValue();


  SDValue Mask = N->getOperand(3);

  SDValue VL = N->getOperand(4);

  unsigned ExtOpc = 0;

  MVT ExtVT;

  auto GetDiff = [&](SDValue Op) {

    unsigned Opc = Op.getOpcode();

    if (Opc == RISCVISD::VZEXT_VL) {

      SDValue Src = Op->getOperand(0);

      unsigned SrcOpc = Src.getOpcode();

      switch (SrcOpc) {

      default:

        return SDValue();

      case ISD::ABDS:

      case RISCVISD::ABDS_VL:

        ExtOpc = RISCVISD::VSEXT_VL;

        break;

      case ISD::ABDU:

      case RISCVISD::ABDU_VL:

        ExtOpc = RISCVISD::VZEXT_VL;

        break;

      }

      ExtVT = Op->getSimpleValueType(0);

      return Src;

    }


    if (Opc != ISD::ABDS && Opc != ISD::ABDU && Opc != RISCVISD::ABDS_VL &&

        Opc != RISCVISD::ABDU_VL)

      return SDValue();

    return Op;

  };


  SDValue Diff = GetDiff(Op0);

  if (!Diff) {

    std::swap(Op0, Op1);

    Diff = GetDiff(Op0);

    if (!Diff)

      return SDValue();

  }

  SDValue Acc = Op1;


  SDLoc DL(N);

  SDValue DiffA = Diff.getOperand(0);

  SDValue DiffB = Diff.getOperand(1);

  if (ExtOpc) {

    DiffA = DAG.getNode(ExtOpc, DL, ExtVT, DiffA, Mask, VL);

    DiffB = DAG.getNode(ExtOpc, DL, ExtVT, DiffB, Mask, VL);

  }

  SDValue Result = DAG.getNode(Diff.getOpcode() == ISD::ABDS ||

                                       Diff.getOpcode() == RISCVISD::ABDS_VL

                                   ? RISCVISD::VWABDA_VL

                                   : RISCVISD::VWABDAU_VL,

                               DL, VT, DiffA, DiffB, Acc, Mask, VL);

  return Result;

}


static SDValue performVWADDSUBW_VLCombine(SDNode *N,

                                          TargetLowering::DAGCombinerInfo &DCI,

                                          const RISCVSubtarget &Subtarget) {

  [[maybe_unused]] unsigned Opc = N->getOpcode();

  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||

         Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);


  if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))

    return V;


  return combineVWADDSUBWSelect(N, DCI.DAG);

}


// Helper function for performMemPairCombine.

// Try to combine the memory loads/stores LSNode1 and LSNode2

// into a single memory pair operation.


static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,

                                 LSBaseSDNode *LSNode2, SDValue BasePtr,

                                 uint64_t Imm) {

  SmallPtrSet<const SDNode *, 32> Visited;

  SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};


  if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||

      SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))

    return SDValue();


  MachineFunction &MF = DAG.getMachineFunction();

  const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();


  // The new operation has twice the width.

  MVT XLenVT = Subtarget.getXLenVT();

  EVT MemVT = LSNode1->getMemoryVT();

  EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;

  MachineMemOperand *MMO = LSNode1->getMemOperand();

  MachineMemOperand *NewMMO = MF.getMachineMemOperand(

      MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);


  if (LSNode1->getOpcode() == ISD::LOAD) {

    auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();

    unsigned Opcode;

    if (MemVT == MVT::i32)

      Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;

    else

      Opcode = RISCVISD::TH_LDD;


    SDValue Res = DAG.getMemIntrinsicNode(

        Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),

        {LSNode1->getChain(), BasePtr,

         DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},

        NewMemVT, NewMMO);


    SDValue Node1 =

        DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));

    SDValue Node2 =

        DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));


    DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());

    return Node1;

  } else {

    unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;


    SDValue Res = DAG.getMemIntrinsicNode(

        Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),

        {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),

         BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},

        NewMemVT, NewMMO);


    DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());

    return Res;

  }

}


// Try to combine two adjacent loads/stores to a single pair instruction from

// the XTHeadMemPair vendor extension.


static SDValue performMemPairCombine(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI) {

  SelectionDAG &DAG = DCI.DAG;

  MachineFunction &MF = DAG.getMachineFunction();

  const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();


  // Target does not support load/store pair.

  if (!Subtarget.hasVendorXTHeadMemPair())

    return SDValue();


  LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);

  EVT MemVT = LSNode1->getMemoryVT();

  unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;


  // No volatile, indexed or atomic loads/stores.

  if (!LSNode1->isSimple() || LSNode1->isIndexed())

    return SDValue();


  // Function to get a base + constant representation from a memory value.

  auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {

    if (Ptr->getOpcode() == ISD::ADD)

      if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))

        return {Ptr->getOperand(0), C1->getZExtValue()};

    return {Ptr, 0};

  };


  auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));


  SDValue Chain = N->getOperand(0);

  for (SDUse &Use : Chain->uses()) {

    if (Use.getUser() != N && Use.getResNo() == 0 &&

        Use.getUser()->getOpcode() == N->getOpcode()) {

      LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());


      // No volatile, indexed or atomic loads/stores.

      if (!LSNode2->isSimple() || LSNode2->isIndexed())

        continue;


      // Check if LSNode1 and LSNode2 have the same type and extension.

      if (LSNode1->getOpcode() == ISD::LOAD)

        if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=

            cast<LoadSDNode>(LSNode1)->getExtensionType())

          continue;


      if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())

        continue;


      auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));


      // Check if the base pointer is the same for both instruction.

      if (Base1 != Base2)

        continue;


      // Check if the offsets match the XTHeadMemPair encoding constraints.

      bool Valid = false;

      if (MemVT == MVT::i32) {

        // Check for adjacent i32 values and a 2-bit index.

        if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))

          Valid = true;

      } else if (MemVT == MVT::i64) {

        // Check for adjacent i64 values and a 2-bit index.

        if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))

          Valid = true;

      }


      if (!Valid)

        continue;


      // Try to combine.

      if (SDValue Res =

              tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))

        return Res;

    }

  }


  return SDValue();

}


// Fold

//   (fp_to_int (froundeven X)) -> fcvt X, rne

//   (fp_to_int (ftrunc X))     -> fcvt X, rtz

//   (fp_to_int (ffloor X))     -> fcvt X, rdn

//   (fp_to_int (fceil X))      -> fcvt X, rup

//   (fp_to_int (fround X))     -> fcvt X, rmm

//   (fp_to_int (frint X))      -> fcvt X


static SDValue performFP_TO_INTCombine(SDNode *N,

                                       TargetLowering::DAGCombinerInfo &DCI,

                                       const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  MVT XLenVT = Subtarget.getXLenVT();


  SDValue Src = N->getOperand(0);


  // Don't do this for strict-fp Src.

  if (Src->isStrictFPOpcode())

    return SDValue();


  // Ensure the FP type is legal.

  if (!TLI.isTypeLegal(Src.getValueType()))

    return SDValue();


  // Don't do this for f16 with Zfhmin and not Zfh.

  if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())

    return SDValue();


  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());

  // If the result is invalid, we didn't find a foldable instruction.

  if (FRM == RISCVFPRndMode::Invalid)

    return SDValue();


  SDLoc DL(N);

  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;

  EVT VT = N->getValueType(0);


  if (VT.isVector() && TLI.isTypeLegal(VT)) {

    MVT SrcVT = Src.getSimpleValueType();

    MVT SrcContainerVT = SrcVT;

    MVT ContainerVT = VT.getSimpleVT();

    SDValue XVal = Src.getOperand(0);


    // For widening and narrowing conversions we just combine it into a

    // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They

    // end up getting lowered to their appropriate pseudo instructions based on

    // their operand types

    if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||

        VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())

      return SDValue();


    // Make fixed-length vectors scalable first

    if (SrcVT.isFixedLengthVector()) {

      SrcContainerVT = getContainerForFixedLengthVector(SrcVT, Subtarget);

      XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);

      ContainerVT = getContainerForFixedLengthVector(ContainerVT, Subtarget);

    }


    auto [Mask, VL] =

        getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);


    SDValue FpToInt;

    if (FRM == RISCVFPRndMode::RTZ) {

      // Use the dedicated trunc static rounding mode if we're truncating so we

      // don't need to generate calls to fsrmi/fsrm

      unsigned Opc =

          IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;

      FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);

    } else {

      unsigned Opc =

          IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;

      FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,

                            DAG.getTargetConstant(FRM, DL, XLenVT), VL);

    }


    // If converted from fixed-length to scalable, convert back

    if (VT.isFixedLengthVector())

      FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);


    return FpToInt;

  }


  // Only handle XLen or i32 types. Other types narrower than XLen will

  // eventually be legalized to XLenVT.

  if (VT != MVT::i32 && VT != XLenVT)

    return SDValue();


  unsigned Opc;

  if (VT == XLenVT)

    Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;

  else

    Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;


  SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),

                                DAG.getTargetConstant(FRM, DL, XLenVT));

  return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);

}


// Fold

//   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))

//   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))

//   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))

//   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))

//   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))

//   (fp_to_int_sat (frint X))      -> (select X == nan, 0, (fcvt X, dyn))


static SDValue performFP_TO_INT_SATCombine(SDNode *N,

                                       TargetLowering::DAGCombinerInfo &DCI,

                                       const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  MVT XLenVT = Subtarget.getXLenVT();


  // Only handle XLen types. Other types narrower than XLen will eventually be

  // legalized to XLenVT.

  EVT DstVT = N->getValueType(0);

  if (DstVT != XLenVT)

    return SDValue();


  SDValue Src = N->getOperand(0);


  // Don't do this for strict-fp Src.

  if (Src->isStrictFPOpcode())

    return SDValue();


  // Ensure the FP type is also legal.

  if (!TLI.isTypeLegal(Src.getValueType()))

    return SDValue();


  // Don't do this for f16 with Zfhmin and not Zfh.

  if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())

    return SDValue();


  EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();


  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());

  if (FRM == RISCVFPRndMode::Invalid)

    return SDValue();


  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;


  unsigned Opc;

  if (SatVT == DstVT)

    Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;

  else if (DstVT == MVT::i64 && SatVT == MVT::i32)

    Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;

  else

    return SDValue();

  // FIXME: Support other SatVTs by clamping before or after the conversion.


  Src = Src.getOperand(0);


  SDLoc DL(N);

  SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,

                                DAG.getTargetConstant(FRM, DL, XLenVT));


  // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero

  // extend.

  if (Opc == RISCVISD::FCVT_WU_RV64)

    FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);


  // RISC-V FP-to-int conversions saturate to the destination register size, but

  // don't produce 0 for nan.

  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);

  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);

}


// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is

// smaller than XLenVT.


static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,

                                        const RISCVSubtarget &Subtarget) {

  assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");


  SDValue Src = N->getOperand(0);

  if (Src.getOpcode() != ISD::BSWAP)

    return SDValue();


  EVT VT = N->getValueType(0);

  if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||

      !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))

    return SDValue();


  SDLoc DL(N);

  return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));

}


/// Matches a reverse shifted right EVL elements, or a vp.reverse.

// TODO: Remove vp.reverse


static auto m_ReverseEVL = [](auto X, auto EVL) {

  using namespace SDPatternMatch;

  return m_AnyOf(m_SpliceRight(m_OneUse(m_VectorReverse(X)), m_Poison(), EVL),

                 m_Node(ISD::EXPERIMENTAL_VP_REVERSE, X, m_Value(), EVL));

};


static SDValue performReverseEVLCombine(SDNode *N, SelectionDAG &DAG,

                                        const RISCVSubtarget &Subtarget) {

  // Fold:

  // vp.reverse(vp.load(ADDR, REVMASK, EVL), EVL)

  // -> vp.strided.load(ADDR, -1, MASK, EVL)

  //

  // splice.right(reverse(vp.load(ADDR, REVMASK, EVL)), poison, EVL)

  // -> vp.strided.load(ADDR, -1, MASK, EVL)


  // Check if its first operand is a vp.load.

  using namespace SDPatternMatch;

  SDValue Op, EVL;

  if (!sd_match(N,

                m_ReverseEVL(m_OneUse(m_Value(Op, m_SpecificOpc(ISD::VP_LOAD))),

                             m_Value(EVL))))

    return SDValue();


  auto *VPLoad = cast<VPLoadSDNode>(Op);


  EVT LoadVT = VPLoad->getValueType(0);

  // We do not have a strided_load version for masks, and the evl of vp.reverse

  // and vp.load should always be the same.

  if (!LoadVT.getVectorElementType().isByteSized() ||

      EVL != VPLoad->getVectorLength())

    return SDValue();


  SDValue LoadMask = VPLoad->getMask();

  // If Mask is all ones, then load is unmasked and can be reversed.

  if (!isOneOrOneSplat(LoadMask)) {

    // If the mask is not all ones, we can reverse the load if the mask was also

    // reversed by a vp.reverse with the same EVL.

    SDValue OrigMask;

    if (!sd_match(LoadMask, m_ReverseEVL(m_Value(OrigMask), m_Specific(EVL))))

      return SDValue();

    LoadMask = OrigMask;

  }


  // Base = LoadAddr + (NumElem - 1) * ElemWidthByte

  SDLoc DL(N);

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue NumElem = VPLoad->getVectorLength();

  uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;


  SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,

                              DAG.getConstant(1, DL, XLenVT));

  SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,

                              DAG.getConstant(ElemWidthByte, DL, XLenVT));

  SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);

  SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);


  MachineFunction &MF = DAG.getMachineFunction();

  MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());

  MachineMemOperand *MMO = MF.getMachineMemOperand(

      PtrInfo, VPLoad->getMemOperand()->getFlags(),

      LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());


  SDValue Ret = DAG.getStridedLoadVP(

      LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,

      VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());


  DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));


  return Ret;

}


// Fold (i32 (bitcast (v4i8/v2i16 const_splat))) to a scalar i32 constant

// on RV64.


static SDValue performP_BITCASTCombine(SDNode *N, SelectionDAG &DAG,

                                       const RISCVSubtarget &Subtarget) {

  SDValue N0 = N->getOperand(0);

  EVT VT = N->getValueType(0);

  EVT SrcVT = N0.getValueType();

  if (!Subtarget.is64Bit() || VT != MVT::i32 ||

      (SrcVT != MVT::v4i8 && SrcVT != MVT::v2i16))

    return SDValue();


  APInt SplatVal;

  if (!ISD::isConstantSplatVector(N0.getNode(), SplatVal))

    return SDValue();

  return DAG.getConstant(APInt::getSplat(VT.getSizeInBits(), SplatVal),

                         SDLoc(N), VT);

}


static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG,

                                      const RISCVSubtarget &Subtarget) {

  // Fold:

  // vp.store(vp.reverse(VAL, EVL), ADDR, REVMASK, EVL)

  // -> vp.strided.store(VAL, NEW_ADDR, -1, MASK, EVL)

  //

  // vp.store(splice.right(reverse(VAL), poison, EVL), ADDR, REVMASK, EVL)

  // -> vp.strided.store(VAL, NEW_ADDR, -1, MASK, EVL)

  auto *VPStore = cast<VPStoreSDNode>(N);

  SDValue EVL = VPStore->getVectorLength();


  using namespace SDPatternMatch;

  SDValue Val;

  if (!sd_match(VPStore->getValue(),

                m_OneUse(m_ReverseEVL(m_Value(Val), m_Specific(EVL)))))

    return SDValue();


  EVT ReverseVT = VPStore->getValue()->getValueType(0);


  // We do not have a strided_store version for masks.

  if (!ReverseVT.getVectorElementType().isByteSized())

    return SDValue();


  SDValue StoreMask = VPStore->getMask();

  // If Mask is all ones, then load is unmasked and can be reversed.

  if (!isOneOrOneSplat(StoreMask)) {

    // If the mask is not all ones, we can reverse the store if the mask was

    // also reversed by a vp.reverse with the same EVL.

    SDValue OrigMask;

    if (!sd_match(StoreMask, m_ReverseEVL(m_Value(OrigMask), m_Specific(EVL))))

      return SDValue();

    StoreMask = OrigMask;

  }


  // Base = StoreAddr + (NumElem - 1) * ElemWidthByte

  SDLoc DL(N);

  MVT XLenVT = Subtarget.getXLenVT();

  SDValue NumElem = VPStore->getVectorLength();

  uint64_t ElemWidthByte = ReverseVT.getScalarSizeInBits() / 8;


  SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,

                              DAG.getConstant(1, DL, XLenVT));

  SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,

                              DAG.getConstant(ElemWidthByte, DL, XLenVT));

  SDValue Base =

      DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);

  SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);


  MachineFunction &MF = DAG.getMachineFunction();

  MachinePointerInfo PtrInfo(VPStore->getAddressSpace());

  MachineMemOperand *MMO = MF.getMachineMemOperand(

      PtrInfo, VPStore->getMemOperand()->getFlags(),

      LocationSize::beforeOrAfterPointer(), VPStore->getAlign());


  return DAG.getStridedStoreVP(

      VPStore->getChain(), DL, Val, Base, VPStore->getOffset(), Stride,

      StoreMask, VPStore->getVectorLength(), VPStore->getMemoryVT(), MMO,

      VPStore->getAddressingMode(), VPStore->isTruncatingStore(),

      VPStore->isCompressingStore());

}


// Convert from one FMA opcode to another based on whether we are negating the

// multiply result and/or the accumulator.

// NOTE: Only supports RVV operations with VL.


static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {

  // Negating the multiply result changes ADD<->SUB and toggles 'N'.

  if (NegMul) {

    // clang-format off

    switch (Opcode) {

    default: llvm_unreachable("Unexpected opcode");

    case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFNMSUB_VL; break;

    case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL;  break;

    case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL;  break;

    case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFNMADD_VL; break;

    case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;

    case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL;  break;

    case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;

    case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFNMADD_VL; break;

    }

    // clang-format on

  }


  // Negating the accumulator changes ADD<->SUB.

  if (NegAcc) {

    // clang-format off

    switch (Opcode) {

    default: llvm_unreachable("Unexpected opcode");

    case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFMSUB_VL;  break;

    case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFMADD_VL;  break;

    case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;

    case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;

    case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;

    case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFMADD_VL;  break;

    case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;

    case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;

    }

    // clang-format on

  }


  return Opcode;

}


static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {

  // Fold FNEG_VL into FMA opcodes.

  // The first operand of strict-fp is chain.

  bool IsStrict =

      DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());

  unsigned Offset = IsStrict ? 1 : 0;

  SDValue A = N->getOperand(0 + Offset);

  SDValue B = N->getOperand(1 + Offset);

  SDValue C = N->getOperand(2 + Offset);

  SDValue Mask = N->getOperand(3 + Offset);

  SDValue VL = N->getOperand(4 + Offset);


  auto invertIfNegative = [&Mask, &VL](SDValue &V) {

    if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&

        V.getOperand(2) == VL) {

      // Return the negated input.

      V = V.getOperand(0);

      return true;

    }


    return false;

  };


  bool NegA = invertIfNegative(A);

  bool NegB = invertIfNegative(B);

  bool NegC = invertIfNegative(C);


  // If no operands are negated, we're done.

  if (!NegA && !NegB && !NegC)

    return SDValue();


  unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);

  if (IsStrict)

    return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),

                       {N->getOperand(0), A, B, C, Mask, VL});

  return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,

                     VL);

}


static SDValue performVFMADD_VLCombine(SDNode *N,

                                       TargetLowering::DAGCombinerInfo &DCI,

                                       const RISCVSubtarget &Subtarget) {

  SelectionDAG &DAG = DCI.DAG;


  if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))

    return V;


  // FIXME: Ignore strict opcodes for now.

  if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))

    return SDValue();


  return combineOp_VLToVWOp_VL(N, DCI, Subtarget);

}


static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");


  EVT VT = N->getValueType(0);


  if (VT != Subtarget.getXLenVT())

    return SDValue();


  if (!isa<ConstantSDNode>(N->getOperand(1)))

    return SDValue();

  uint64_t ShAmt = N->getConstantOperandVal(1);


  SDValue N0 = N->getOperand(0);


  // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->

  // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.

  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {

    unsigned ExtSize =

        cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();

    if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&

        N0.getOperand(0).hasOneUse() &&

        isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {

      uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);

      if (LShAmt < ExtSize) {

        unsigned Size = VT.getSizeInBits();

        SDLoc ShlDL(N0.getOperand(0));

        SDValue Shl =

            DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),

                        DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));

        SDLoc DL(N);

        return DAG.getNode(ISD::SRA, DL, VT, Shl,

                           DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));

      }

    }

  }


  if (ShAmt > 32 || VT != MVT::i64)

    return SDValue();


  // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)

  // FIXME: Should this be a generic combine? There's a similar combine on X86.

  //

  // Also try these folds where an add or sub is in the middle.

  // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)

  // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)

  SDValue Shl;

  ConstantSDNode *AddC = nullptr;


  // We might have an ADD or SUB between the SRA and SHL.

  bool IsAdd = N0.getOpcode() == ISD::ADD;

  if ((IsAdd || N0.getOpcode() == ISD::SUB)) {

    // Other operand needs to be a constant we can modify.

    AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));

    if (!AddC)

      return SDValue();


    // AddC needs to have at least 32 trailing zeros.

    if (llvm::countr_zero(AddC->getZExtValue()) < 32)

      return SDValue();


    // All users should be a shift by constant less than or equal to 32. This

    // ensures we'll do this optimization for each of them to produce an

    // add/sub+sext_inreg they can all share.

    for (SDNode *U : N0->users()) {

      if (U->getOpcode() != ISD::SRA ||

          !isa<ConstantSDNode>(U->getOperand(1)) ||

          U->getConstantOperandVal(1) > 32)

        return SDValue();

    }


    Shl = N0.getOperand(IsAdd ? 0 : 1);

  } else {

    // Not an ADD or SUB.

    Shl = N0;

  }


  // Look for a shift left by 32.

  if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||

      Shl.getConstantOperandVal(1) != 32)

    return SDValue();


  // We if we didn't look through an add/sub, then the shl should have one use.

  // If we did look through an add/sub, the sext_inreg we create is free so

  // we're only creating 2 new instructions. It's enough to only remove the

  // original sra+add/sub.

  if (!AddC && !Shl.hasOneUse())

    return SDValue();


  SDLoc DL(N);

  SDValue In = Shl.getOperand(0);


  // If we looked through an ADD or SUB, we need to rebuild it with the shifted

  // constant.

  if (AddC) {

    SDValue ShiftedAddC =

        DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);

    if (IsAdd)

      In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);

    else

      In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);

  }


  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,

                             DAG.getValueType(MVT::i32));

  if (ShAmt == 32)

    return SExt;


  return DAG.getNode(

      ISD::SHL, DL, MVT::i64, SExt,

      DAG.getConstant(32 - ShAmt, DL, MVT::i64));

}


// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if

// the result is used as the condition of a br_cc or select_cc we can invert,

// inverting the setcc is free, and Z is 0/1. Caller will invert the

// br_cc/select_cc.


static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {

  bool IsAnd = Cond.getOpcode() == ISD::AND;

  if (!IsAnd && Cond.getOpcode() != ISD::OR)

    return SDValue();


  if (!Cond.hasOneUse())

    return SDValue();


  SDValue Setcc = Cond.getOperand(0);

  SDValue Xor = Cond.getOperand(1);

  // Canonicalize setcc to LHS.

  if (Setcc.getOpcode() != ISD::SETCC)

    std::swap(Setcc, Xor);

  // LHS should be a setcc and RHS should be an xor.

  if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||

      Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())

    return SDValue();


  // If the condition is an And, SimplifyDemandedBits may have changed

  // (xor Z, 1) to (not Z).

  SDValue Xor1 = Xor.getOperand(1);

  if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))

    return SDValue();


  EVT VT = Cond.getValueType();

  SDValue Xor0 = Xor.getOperand(0);


  // The LHS of the xor needs to be 0/1.

  APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);

  if (!DAG.MaskedValueIsZero(Xor0, Mask))

    return SDValue();


  // We can only invert integer setccs.

  EVT SetCCOpVT = Setcc.getOperand(0).getValueType();

  if (!SetCCOpVT.isScalarInteger())

    return SDValue();


  ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();

  if (ISD::isIntEqualitySetCC(CCVal)) {

    CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);

    Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),

                         Setcc.getOperand(1), CCVal);

  } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {

    // Invert (setlt 0, X) by converting to (setlt X, 1).

    Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),

                         DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);

  } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {

    // (setlt X, 1) by converting to (setlt 0, X).

    Setcc = DAG.getSetCC(SDLoc(Setcc), VT,

                         DAG.getConstant(0, SDLoc(Setcc), VT),

                         Setcc.getOperand(0), CCVal);

  } else

    return SDValue();


  unsigned Opc = IsAnd ? ISD::OR : ISD::AND;

  return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));

}


// Perform common combines for BR_CC and SELECT_CC conditions.


static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,

                       SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {

  ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();


  // As far as arithmetic right shift always saves the sign,

  // shift can be omitted.

  // Fold setlt (sra X, N), 0 -> setlt X, 0 and

  // setge (sra X, N), 0 -> setge X, 0

  if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&

      LHS.getOpcode() == ISD::SRA) {

    LHS = LHS.getOperand(0);

    return true;

  }


  if (!ISD::isIntEqualitySetCC(CCVal))

    return false;


  // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)

  // Sometimes the setcc is introduced after br_cc/select_cc has been formed.

  if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&

      LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {

    // If we're looking for eq 0 instead of ne 0, we need to invert the

    // condition.

    bool Invert = CCVal == ISD::SETEQ;

    CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();

    if (Invert)

      CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());


    RHS = LHS.getOperand(1);

    LHS = LHS.getOperand(0);

    translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);


    CC = DAG.getCondCode(CCVal);

    return true;

  }


  auto isFoldableXorEq = [&DAG](SDValue LHS, SDValue RHS) -> bool {

    if (LHS.getOpcode() != ISD::XOR || !isNullConstant(RHS))

      return false;


    // If XOR cannot be an XORI, allow the fold.

    const auto *XorCnst = dyn_cast<ConstantSDNode>(LHS.getOperand(1));

    if (!XorCnst || !isInt<12>(XorCnst->getSExtValue()))

      return true;


    // Fold (X(i1) ^ 1) == 0 -> X != 0

    SDValue VarOp = LHS.getOperand(0);

    const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);

    if (XorCnst->getSExtValue() == 1 && DAG.MaskedValueIsZero(VarOp, Mask))

      return true;


    // If the Xor is only used by select or br_cc, allow the fold.

    return all_of(LHS->users(), [](const SDNode *UserNode) {

      const unsigned Opcode = UserNode->getOpcode();

      return Opcode == RISCVISD::SELECT_CC || Opcode == RISCVISD::BR_CC;

    });

  };

  // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)

  if (isFoldableXorEq(LHS, RHS)) {

    RHS = LHS.getOperand(1);

    LHS = LHS.getOperand(0);

    return true;

  }

  // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)

  if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {

    const SDValue LHS0 = LHS.getOperand(0);

    if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {

      // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))

      RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),

                        LHS0.getOperand(1), LHS.getOperand(1));

      LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),

                        LHS0.getOperand(0), LHS.getOperand(1));

      return true;

    }

  }


  // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)

  if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&

      LHS.getOperand(1).getOpcode() == ISD::Constant) {

    SDValue LHS0 = LHS.getOperand(0);

    if (LHS0.getOpcode() == ISD::AND &&

        LHS0.getOperand(1).getOpcode() == ISD::Constant) {

      uint64_t Mask = LHS0.getConstantOperandVal(1);

      uint64_t ShAmt = LHS.getConstantOperandVal(1);

      if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {

        // XAndesPerf supports branch on test bit.

        if (Subtarget.hasVendorXAndesPerf()) {

          LHS =

              DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),

                          DAG.getConstant(Mask, DL, LHS.getValueType()));

          return true;

        }


        CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;

        CC = DAG.getCondCode(CCVal);


        ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;

        LHS = LHS0.getOperand(0);

        if (ShAmt != 0)

          LHS =

              DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),

                          DAG.getConstant(ShAmt, DL, LHS.getValueType()));

        return true;

      }

    }

  }


  // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.

  // This can occur when legalizing some floating point comparisons.

  APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);

  if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {

    CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());

    CC = DAG.getCondCode(CCVal);

    RHS = DAG.getConstant(0, DL, LHS.getValueType());

    return true;

  }


  if (isNullConstant(RHS)) {

    if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {

      CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());

      CC = DAG.getCondCode(CCVal);

      LHS = NewCond;

      return true;

    }

  }


  return false;

}


// Fold

// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).

// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).

// (select C, (or Y, X), Y)  -> (or Y, (select C, X, 0)).

// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).

// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).

// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).


static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,

                                   SDValue TrueVal, SDValue FalseVal,

                                   bool Swapped) {

  bool Commutative = true;

  unsigned Opc = TrueVal.getOpcode();

  switch (Opc) {

  default:

    return SDValue();

  case ISD::SHL:

  case ISD::SRA:

  case ISD::SRL:

  case ISD::SUB:

  case ISD::ROTL:

  case ISD::ROTR:

    Commutative = false;

    break;

  case ISD::ADD:

  case ISD::OR:

  case ISD::XOR:

  case ISD::UMIN:

  case ISD::UMAX:

    break;

  }


  if (!TrueVal.hasOneUse())

    return SDValue();


  unsigned OpToFold;

  if (FalseVal == TrueVal.getOperand(0))

    OpToFold = 0;

  else if (Commutative && FalseVal == TrueVal.getOperand(1))

    OpToFold = 1;

  else

    return SDValue();


  EVT VT = N->getValueType(0);

  SDLoc DL(N);

  SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);

  EVT OtherOpVT = OtherOp.getValueType();

  SDValue IdentityOperand =

      DAG.getIdentityElement(Opc, DL, OtherOpVT, N->getFlags());

  if (!Commutative)

    IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);

  assert(IdentityOperand && "No identity operand!");


  if (Swapped)

    std::swap(OtherOp, IdentityOperand);

  SDValue NewSel =

      DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);

  return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);

}


// This tries to get rid of `select` and `icmp` that are being used to handle

// `Targets` that do not support `cttz(0)`/`ctlz(0)`.


static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {

  SDValue Cond = N->getOperand(0);


  // This represents either CTTZ or CTLZ instruction.

  SDValue CountZeroes;


  SDValue ValOnZero;


  if (Cond.getOpcode() != ISD::SETCC)

    return SDValue();


  if (!isNullConstant(Cond->getOperand(1)))

    return SDValue();


  ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();

  if (CCVal == ISD::CondCode::SETEQ) {

    CountZeroes = N->getOperand(2);

    ValOnZero = N->getOperand(1);

  } else if (CCVal == ISD::CondCode::SETNE) {

    CountZeroes = N->getOperand(1);

    ValOnZero = N->getOperand(2);

  } else {

    return SDValue();

  }


  if (CountZeroes.getOpcode() == ISD::TRUNCATE ||

      CountZeroes.getOpcode() == ISD::ZERO_EXTEND)

    CountZeroes = CountZeroes.getOperand(0);


  if (CountZeroes.getOpcode() != ISD::CTTZ &&

      CountZeroes.getOpcode() != ISD::CTTZ_ZERO_POISON &&

      CountZeroes.getOpcode() != ISD::CTLZ &&

      CountZeroes.getOpcode() != ISD::CTLZ_ZERO_POISON)

    return SDValue();


  if (!isNullConstant(ValOnZero))

    return SDValue();


  SDValue CountZeroesArgument = CountZeroes->getOperand(0);

  if (Cond->getOperand(0) != CountZeroesArgument)

    return SDValue();


  unsigned BitWidth = CountZeroes.getValueSizeInBits();

  if (!isPowerOf2_32(BitWidth))

    return SDValue();


  if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_POISON) {

    CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),

                              CountZeroes.getValueType(), CountZeroesArgument);

  } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_POISON) {

    CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),

                              CountZeroes.getValueType(), CountZeroesArgument);

  }


  SDValue BitWidthMinusOne =

      DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());


  auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),

                             CountZeroes, BitWidthMinusOne);

  return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));

}


static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,

                                const RISCVSubtarget &Subtarget) {

  SDValue Cond = N->getOperand(0);

  SDValue True = N->getOperand(1);

  SDValue False = N->getOperand(2);

  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  EVT CondVT = Cond.getValueType();


  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())

    return SDValue();


  // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate

  // BEXTI, where C is power of 2.

  if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&

      (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {

    SDValue LHS = Cond.getOperand(0);

    SDValue RHS = Cond.getOperand(1);

    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

    if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&

        isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {

      const APInt &MaskVal = LHS.getConstantOperandAPInt(1);

      if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))

        return DAG.getSelect(DL, VT,

                             DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),

                             False, True);

    }

  }

  return SDValue();

}


static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {

  if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())

    return false;


  SwapCC = false;

  if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {

    std::swap(TrueVal, FalseVal);

    SwapCC = true;

  }


  if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)

    return false;


  SDValue A = FalseVal.getOperand(0);

  SDValue B = FalseVal.getOperand(1);

  // Add is commutative, so check both orders

  return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||

          (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));

}


/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).

/// This allows us match a vadd.vv fed by a masked vrsub, which reduces

/// register pressure over the add followed by masked vsub sequence.


static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {

  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  SDValue CC = N->getOperand(0);

  SDValue TrueVal = N->getOperand(1);

  SDValue FalseVal = N->getOperand(2);


  bool SwapCC;

  if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))

    return SDValue();


  SDValue Sub = SwapCC ? TrueVal : FalseVal;

  SDValue A = Sub.getOperand(0);

  SDValue B = Sub.getOperand(1);


  // Arrange the select such that we can match a masked

  // vrsub.vi to perform the conditional negate

  SDValue NegB = DAG.getNegative(B, DL, VT);

  if (!SwapCC)

    CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));

  SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);

  return DAG.getNode(ISD::ADD, DL, VT, A, NewB);

}


// Fold (iN (select (src >u ((1 << N) - 1)), sext(src >s -1), trunc(src))) to

// USATI. This pattern saturates a signed value to an unsigned N-bit range

// [0, 2^N-1]:

//   - If src < 0: result = 0

//       (via the inner comparison src > -1 = false, sext to 0)

//   - If src > ((1 << C) - 1): result = all 1s

//       (via sext(true) = -1 = 0xFF...)

//   - Otherwise: result = src (via trunc(src))

// The outer comparison is unsigned, so negative values appear as large

// unsigned values and trigger the saturation to MaxVal path, where the

// inner signed comparison then produces 0.

// TODO: Support (select (src <=u ((1 << C) - 1)), trunc(src), sext(src >s -1)).


static SDValue foldSelectToUSATI(SDNode *N, SelectionDAG &DAG,

                                 const RISCVSubtarget &Subtarget) {

  if (!Subtarget.hasStdExtP())

    return SDValue();


  EVT VT = N->getValueType(0);

  MVT XLenVT = Subtarget.getXLenVT();


  // Only support scalar integer types smaller than XLenVT

  if (!VT.isScalarInteger() || VT.bitsGE(XLenVT))

    return SDValue();


  unsigned SatWidth = VT.getSizeInBits();

  uint64_t MaxVal = (1ULL << SatWidth) - 1;


  using namespace SDPatternMatch;


  SDValue Src, InnerSetCC, FalseSrc;

  if (!sd_match(N, m_Select(m_SetCC(m_Value(Src), m_SpecificInt(MaxVal),

                                    m_SpecificCondCode(ISD::SETUGT)),

                            m_SExt(m_Value(InnerSetCC)),

                            m_Trunc(m_Value(FalseSrc)))))

    return SDValue();


  // Src can't be larger than XLenVT.

  if (Src.getValueType().bitsGT(XLenVT))

    return SDValue();


  // Check inner setcc: src > -1 (signed comparison)

  if (!sd_match(InnerSetCC,

                m_SpecificVT(MVT::i1, m_SetCC(m_Specific(Src), m_AllOnes(),

                                              m_SpecificCondCode(ISD::SETGT)))))

    return SDValue();


  // It's possible that the input to the setccs is also a truncate, in that

  // case the input to the truncate on the select's false operand may be the

  // same as the input to this setcc truncate. We need to look through the

  // setcc truncate to make sure CmpSrc and FalseSrc come from the same value.

  SDValue CmpSrc = Src;

  if (CmpSrc != FalseSrc && CmpSrc.getOpcode() == ISD::TRUNCATE)

    CmpSrc = CmpSrc.getOperand(0);


  if (CmpSrc != FalseSrc)

    return SDValue();


  // We found a USATI pattern.

  SDLoc DL(N);

  Src = DAG.getNode(ISD::SIGN_EXTEND, DL, XLenVT, Src);

  SDValue USATI = DAG.getNode(RISCVISD::USATI, DL, XLenVT, Src,

                              DAG.getTargetConstant(SatWidth, DL, XLenVT));

  return DAG.getNode(ISD::TRUNCATE, DL, VT, USATI);

}


static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,

                                    const RISCVSubtarget &Subtarget) {

  if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))

    return Folded;


  if (SDValue V = foldSelectToUSATI(N, DAG, Subtarget))

    return V;


  if (SDValue V = useInversedSetcc(N, DAG, Subtarget))

    return V;


  if (Subtarget.hasConditionalMoveFusion())

    return SDValue();


  SDValue TrueVal = N->getOperand(1);

  SDValue FalseVal = N->getOperand(2);

  if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))

    return V;

  return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);

}


/// If we have a build_vector where each lane is binop X, C, where C

/// is a constant (but not necessarily the same constant on all lanes),

/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).

/// We assume that materializing a constant build vector will be no more

/// expensive that performing O(n) binops.


static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,

                                          const RISCVSubtarget &Subtarget,

                                          const RISCVTargetLowering &TLI) {

  SDLoc DL(N);

  EVT VT = N->getValueType(0);


  assert(!VT.isScalableVector() && "unexpected build vector");


  if (VT.getVectorNumElements() == 1)

    return SDValue();


  const unsigned Opcode = N->op_begin()->getNode()->getOpcode();

  if (!TLI.isBinOp(Opcode))

    return SDValue();


  if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))

    return SDValue();


  // This BUILD_VECTOR involves an implicit truncation, and sinking

  // truncates through binops is non-trivial.

  if (N->op_begin()->getValueType() != VT.getVectorElementType())

    return SDValue();


  SmallVector<SDValue> LHSOps;

  SmallVector<SDValue> RHSOps;

  for (SDValue Op : N->ops()) {

    if (Op.isUndef()) {

      // We can't form a divide or remainder from undef.

      if (!DAG.isSafeToSpeculativelyExecute(Opcode))

        return SDValue();


      LHSOps.push_back(Op);

      RHSOps.push_back(Op);

      continue;

    }


    // TODO: We can handle operations which have an neutral rhs value

    // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track

    // of profit in a more explicit manner.

    if (Op.getOpcode() != Opcode || !Op.hasOneUse())

      return SDValue();


    LHSOps.push_back(Op.getOperand(0));

    if (!isa<ConstantSDNode>(Op.getOperand(1)) &&

        !isa<ConstantFPSDNode>(Op.getOperand(1)))

      return SDValue();

    // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may

    // have different LHS and RHS types.

    if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())

      return SDValue();


    RHSOps.push_back(Op.getOperand(1));

  }


  return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),

                     DAG.getBuildVector(VT, DL, RHSOps));

}


static MVT getQDOTXResultType(MVT OpVT) {

  ElementCount OpEC = OpVT.getVectorElementCount();

  assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);

  return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));

}


/// Given fixed length vectors A and B with equal element types, but possibly

/// different number of elements, return A + B where either A or B is zero

/// padded to the larger number of elements.


static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B,

                                SelectionDAG &DAG) {

  // NOTE: Manually doing the extract/add/insert scheme produces

  // significantly better codegen than the naive pad with zeros

  // and add scheme.

  EVT AVT = A.getValueType();

  EVT BVT = B.getValueType();

  assert(AVT.getVectorElementType() == BVT.getVectorElementType());

  if (AVT.getVectorMinNumElements() > BVT.getVectorMinNumElements()) {

    std::swap(A, B);

    std::swap(AVT, BVT);

  }


  SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);

  SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);

  return DAG.getInsertSubvector(DL, B, Res, 0);

}


static SDValue foldReduceOperandViaVDOT4A(SDValue InVec, const SDLoc &DL,

                                          SelectionDAG &DAG,

                                          const RISCVSubtarget &Subtarget,

                                          const RISCVTargetLowering &TLI) {

  using namespace SDPatternMatch;

  // Note: We intentionally do not check the legality of the reduction type.

  // We want to handle the m4/m8 *src*  types, and thus need to let illegal

  // intermediate types flow through here.

  if (InVec.getValueType().getVectorElementType() != MVT::i32 ||

      !InVec.getValueType().getVectorElementCount().isKnownMultipleOf(4))

    return SDValue();


  // Recurse through adds/disjoint ors (since generic dag canonicalizes to that

  // form).

  SDValue A, B;

  if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {

    SDValue AOpt = foldReduceOperandViaVDOT4A(A, DL, DAG, Subtarget, TLI);

    SDValue BOpt = foldReduceOperandViaVDOT4A(B, DL, DAG, Subtarget, TLI);

    if (AOpt || BOpt) {

      if (AOpt)

        A = AOpt;

      if (BOpt)

        B = BOpt;

      // From here, we're doing A + B with mixed types, implicitly zero

      // padded to the wider type.  Note that we *don't* need the result

      // type to be the original VT, and in fact prefer narrower ones

      // if possible.

      return getZeroPaddedAdd(DL, A, B, DAG);

    }

  }


  // zext a <--> partial_reduce_umla 0, a, 1

  // sext a <--> partial_reduce_smla 0, a, 1

  if (InVec.getOpcode() == ISD::ZERO_EXTEND ||

      InVec.getOpcode() == ISD::SIGN_EXTEND) {

    SDValue A = InVec.getOperand(0);

    EVT OpVT = A.getValueType();

    if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))

      return SDValue();


    MVT ResVT = getQDOTXResultType(A.getSimpleValueType());

    SDValue B = DAG.getConstant(0x1, DL, OpVT);

    bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;

    unsigned Opc =

        IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;

    return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});

  }


  // mul (sext a, sext b) -> partial_reduce_smla 0, a, b

  // mul (zext a, zext b) -> partial_reduce_umla 0, a, b

  // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b

  // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)

  if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))

    return SDValue();


  if (!ISD::isExtOpcode(A.getOpcode()))

    return SDValue();


  EVT OpVT = A.getOperand(0).getValueType();

  if (OpVT.getVectorElementType() != MVT::i8 ||

      OpVT != B.getOperand(0).getValueType() ||

      !TLI.isTypeLegal(A.getValueType()))

    return SDValue();


  unsigned Opc;

  if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)

    Opc = ISD::PARTIAL_REDUCE_SMLA;

  else if (A.getOpcode() == ISD::ZERO_EXTEND &&

           B.getOpcode() == ISD::ZERO_EXTEND)

    Opc = ISD::PARTIAL_REDUCE_UMLA;

  else if (A.getOpcode() == ISD::SIGN_EXTEND &&

           B.getOpcode() == ISD::ZERO_EXTEND)

    Opc = ISD::PARTIAL_REDUCE_SUMLA;

  else if (A.getOpcode() == ISD::ZERO_EXTEND &&

           B.getOpcode() == ISD::SIGN_EXTEND) {

    Opc = ISD::PARTIAL_REDUCE_SUMLA;

    std::swap(A, B);

  } else

    return SDValue();


  MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());

  return DAG.getNode(

      Opc, DL, ResVT,

      {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});

}


static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG,

                                       const RISCVSubtarget &Subtarget,

                                       const RISCVTargetLowering &TLI) {

  if (!Subtarget.hasStdExtZvdot4a8i())

    return SDValue();


  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  SDValue InVec = N->getOperand(0);

  if (SDValue V = foldReduceOperandViaVDOT4A(InVec, DL, DAG, Subtarget, TLI))

    return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);

  return SDValue();

}


static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,

                                               const RISCVSubtarget &Subtarget,

                                               const RISCVTargetLowering &TLI) {

  SDValue InVec = N->getOperand(0);

  SDValue InVal = N->getOperand(1);

  SDValue EltNo = N->getOperand(2);

  SDLoc DL(N);


  EVT VT = InVec.getValueType();

  if (VT.isScalableVector())

    return SDValue();


  if (!InVec.hasOneUse())

    return SDValue();


  // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt

  // move the insert_vector_elts into the arms of the binop.  Note that

  // the new RHS must be a constant.

  const unsigned InVecOpcode = InVec->getOpcode();

  if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&

      InVal.hasOneUse()) {

    SDValue InVecLHS = InVec->getOperand(0);

    SDValue InVecRHS = InVec->getOperand(1);

    SDValue InValLHS = InVal->getOperand(0);

    SDValue InValRHS = InVal->getOperand(1);


    if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))

      return SDValue();

    if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))

      return SDValue();

    // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may

    // have different LHS and RHS types.

    if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())

      return SDValue();

    SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,

                              InVecLHS, InValLHS, EltNo);

    SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,

                              InVecRHS, InValRHS, EltNo);

    return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);

  }


  // Given insert_vector_elt (concat_vectors ...), InVal, Elt

  // move the insert_vector_elt to the source operand of the concat_vector.

  if (InVec.getOpcode() != ISD::CONCAT_VECTORS)

    return SDValue();


  auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);

  if (!IndexC)

    return SDValue();

  unsigned Elt = IndexC->getZExtValue();


  EVT ConcatVT = InVec.getOperand(0).getValueType();

  if (ConcatVT.getVectorElementType() != InVal.getValueType())

    return SDValue();

  unsigned ConcatNumElts = ConcatVT.getVectorNumElements();

  unsigned NewIdx = Elt % ConcatNumElts;


  unsigned ConcatOpIdx = Elt / ConcatNumElts;

  SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);

  ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);


  SmallVector<SDValue> ConcatOps(InVec->ops());

  ConcatOps[ConcatOpIdx] = ConcatOp;

  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);

}


// If we're concatenating a series of vector loads like

// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...

// Then we can turn this into a strided load by widening the vector elements

// vlse32 p, stride=n


static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,

                                            const RISCVSubtarget &Subtarget,

                                            const RISCVTargetLowering &TLI) {

  SDLoc DL(N);

  EVT VT = N->getValueType(0);


  // Only perform this combine on legal MVTs.

  if (!TLI.isTypeLegal(VT))

    return SDValue();


  // TODO: Potentially extend this to scalable vectors

  if (VT.isScalableVector())

    return SDValue();


  auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));

  if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||

      !SDValue(BaseLd, 0).hasOneUse())

    return SDValue();


  EVT BaseLdVT = BaseLd->getValueType(0);


  // Go through the loads and check that they're strided

  SmallVector<LoadSDNode *> Lds;

  Lds.push_back(BaseLd);

  Align Align = BaseLd->getAlign();

  for (SDValue Op : N->ops().drop_front()) {

    auto *Ld = dyn_cast<LoadSDNode>(Op);

    if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||

        Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||

        Ld->getValueType(0) != BaseLdVT)

      return SDValue();


    Lds.push_back(Ld);


    // The common alignment is the most restrictive (smallest) of all the loads

    Align = std::min(Align, Ld->getAlign());

  }


  using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;

  auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,

                           LoadSDNode *Ld2) -> std::optional<PtrDiff> {

    // If the load ptrs can be decomposed into a common (Base + Index) with a

    // common constant stride, then return the constant stride.

    BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);

    BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);

    if (BIO1.equalBaseIndex(BIO2, DAG))

      return {{BIO2.getOffset() - BIO1.getOffset(), false}};


    // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)

    SDValue P1 = Ld1->getBasePtr();

    SDValue P2 = Ld2->getBasePtr();

    if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)

      return {{P2.getOperand(1), false}};

    if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)

      return {{P1.getOperand(1), true}};


    return std::nullopt;

  };


  // Get the distance between the first and second loads

  auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);

  if (!BaseDiff)

    return SDValue();


  // Check all the loads are the same distance apart

  for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)

    if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)

      return SDValue();


  // TODO: At this point, we've successfully matched a generalized gather

  // load.  Maybe we should emit that, and then move the specialized

  // matchers above and below into a DAG combine?


  // Get the widened scalar type, e.g. v4i8 -> i64

  unsigned WideScalarBitWidth =

      BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();

  MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);


  // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64

  MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());

  if (!TLI.isTypeLegal(WideVecVT))

    return SDValue();


  // Check that the operation is legal

  if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))

    return SDValue();


  auto [StrideVariant, MustNegateStride] = *BaseDiff;

  SDValue Stride =

      std::holds_alternative<SDValue>(StrideVariant)

          ? std::get<SDValue>(StrideVariant)

          : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,

                                  Lds[0]->getOffset().getValueType());

  if (MustNegateStride)

    Stride = DAG.getNegative(Stride, DL, Stride.getValueType());


  SDValue AllOneMask =

    DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,

                 DAG.getConstant(1, DL, MVT::i1));


  uint64_t MemSize;

  if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);

      ConstStride && ConstStride->getSExtValue() >= 0)

    // total size = (elsize * n) + (stride - elsize) * (n-1)

    //            = elsize + stride * (n-1)

    MemSize = WideScalarVT.getSizeInBits() +

              ConstStride->getSExtValue() * (N->getNumOperands() - 1);

  else

    // If Stride isn't constant, then we can't know how much it will load

    MemSize = MemoryLocation::UnknownSize;


  MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(

      BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,

      Align);


  SDValue StridedLoad = DAG.getStridedLoadVP(

      WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,

      AllOneMask,

      DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);


  for (SDValue Ld : N->ops())

    DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);


  return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);

}


static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG,

                                            const RISCVSubtarget &Subtarget,

                                            const RISCVTargetLowering &TLI) {

  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  const unsigned ElementSize = VT.getScalarSizeInBits();

  const unsigned NumElts = VT.getVectorNumElements();

  SDValue V1 = N->getOperand(0);

  SDValue V2 = N->getOperand(1);

  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

  ArrayRef<int> Mask = SVN->getMask();

  MVT XLenVT = Subtarget.getXLenVT();


  // Recognized a disguised select of add/sub.

  bool SwapCC;

  if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&

      matchSelectAddSub(V1, V2, SwapCC)) {

    SDValue Sub = SwapCC ? V1 : V2;

    SDValue A = Sub.getOperand(0);

    SDValue B = Sub.getOperand(1);


    SmallVector<SDValue> MaskVals;

    for (int MaskIndex : Mask) {

      bool SelectMaskVal = (MaskIndex < (int)NumElts);

      MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));

    }

    assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");

    EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);

    SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);


    // Arrange the select such that we can match a masked

    // vrsub.vi to perform the conditional negate

    SDValue NegB = DAG.getNegative(B, DL, VT);

    if (!SwapCC)

      CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));

    SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);

    return DAG.getNode(ISD::ADD, DL, VT, A, NewB);

  }


  if (SDValue V = compressShuffleOfShuffles(SVN, Subtarget, DAG))

    return V;


  // Custom legalize <N x i128> or <N x i256> to <M x ELEN>.  This runs

  // during the combine phase before type legalization, and relies on

  // DAGCombine not undoing the transform if isShuffleMaskLegal returns false

  // for the source mask.

  if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||

      !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||

      VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))

    return SDValue();


  SmallVector<int, 8> NewMask;

  narrowShuffleMaskElts(2, Mask, NewMask);


  LLVMContext &C = *DAG.getContext();

  EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);

  EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);

  SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),

                                     DAG.getBitcast(NewVT, V2), NewMask);

  return DAG.getBitcast(VT, Res);

}


static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,

                               const RISCVSubtarget &Subtarget) {

  assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);


  if (N->getValueType(0).isFixedLengthVector())

    return SDValue();


  SDValue Addend = N->getOperand(0);

  SDValue MulOp = N->getOperand(1);


  if (N->getOpcode() == RISCVISD::ADD_VL) {

    SDValue AddPassthruOp = N->getOperand(2);

    if (!AddPassthruOp.isUndef())

      return SDValue();

  }


  auto IsVWMulOpc = [](unsigned Opc) {

    switch (Opc) {

    case RISCVISD::VWMUL_VL:

    case RISCVISD::VWMULU_VL:

    case RISCVISD::VWMULSU_VL:

      return true;

    default:

      return false;

    }

  };


  if (!IsVWMulOpc(MulOp.getOpcode()))

    std::swap(Addend, MulOp);


  if (!IsVWMulOpc(MulOp.getOpcode()))

    return SDValue();


  SDValue MulPassthruOp = MulOp.getOperand(2);


  if (!MulPassthruOp.isUndef())

    return SDValue();


  auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,

                             const RISCVSubtarget &Subtarget) {

    if (N->getOpcode() == ISD::ADD) {

      SDLoc DL(N);

      return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,

                                     Subtarget);

    }

    return std::make_pair(N->getOperand(3), N->getOperand(4));

  }(N, DAG, Subtarget);


  SDValue MulMask = MulOp.getOperand(3);

  SDValue MulVL = MulOp.getOperand(4);


  if (AddMask != MulMask || AddVL != MulVL)

    return SDValue();


  const auto &TSInfo =

      static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());

  unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());


  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,

                   AddVL};

  return DAG.getNode(Opc, DL, VT, Ops);

}


static SDValue combineVdot4aAccum(SDNode *N, SelectionDAG &DAG,

                                  const RISCVSubtarget &Subtarget) {


  assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);


  if (!N->getValueType(0).isVector())

    return SDValue();


  SDValue Addend = N->getOperand(0);

  SDValue DotOp = N->getOperand(1);


  if (N->getOpcode() == RISCVISD::ADD_VL) {

    SDValue AddPassthruOp = N->getOperand(2);

    if (!AddPassthruOp.isUndef())

      return SDValue();

  }


  auto IsVdot4aOpc = [](unsigned Opc) {

    switch (Opc) {

    case RISCVISD::VDOT4A_VL:

    case RISCVISD::VDOT4AU_VL:

    case RISCVISD::VDOT4ASU_VL:

      return true;

    default:

      return false;

    }

  };


  if (!IsVdot4aOpc(DotOp.getOpcode()))

    std::swap(Addend, DotOp);


  if (!IsVdot4aOpc(DotOp.getOpcode()))

    return SDValue();


  auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,

                             const RISCVSubtarget &Subtarget) {

    if (N->getOpcode() == ISD::ADD) {

      SDLoc DL(N);

      return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,

                                     Subtarget);

    }

    return std::make_pair(N->getOperand(3), N->getOperand(4));

  }(N, DAG, Subtarget);


  SDValue MulVL = DotOp.getOperand(4);

  if (AddVL != MulVL)

    return SDValue();


  if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||

      AddMask.getOperand(0) != MulVL)

    return SDValue();


  SDValue AccumOp = DotOp.getOperand(2);

  SDLoc DL(N);

  EVT VT = N->getValueType(0);

  Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,

                       DAG.getUNDEF(VT), AddMask, AddVL);


  SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,

                   DotOp.getOperand(3), DotOp->getOperand(4)};

  return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);

}


static bool


legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,

                               ISD::MemIndexType &IndexType,

                               RISCVTargetLowering::DAGCombinerInfo &DCI) {

  if (!DCI.isBeforeLegalize())

    return false;


  SelectionDAG &DAG = DCI.DAG;

  const MVT XLenVT =

    DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();


  const EVT IndexVT = Index.getValueType();


  // RISC-V indexed loads only support the "unsigned unscaled" addressing

  // mode, so anything else must be manually legalized.

  if (!isIndexTypeSigned(IndexType))

    return false;


  if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {

    // Any index legalization should first promote to XLenVT, so we don't lose

    // bits when scaling. This may create an illegal index type so we let

    // LLVM's legalization take care of the splitting.

    // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.

    Index = DAG.getNode(ISD::SIGN_EXTEND, DL,

                        EVT::getVectorVT(*DAG.getContext(), XLenVT,

                                         IndexVT.getVectorElementCount()),

                        Index);

  }

  IndexType = ISD::UNSIGNED_SCALED;

  return true;

}


/// Match the index vector of a scatter or gather node as the shuffle mask

/// which performs the rearrangement if possible.  Will only match if

/// all lanes are touched, and thus replacing the scatter or gather with

/// a unit strided access and shuffle is legal.


static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,

                                SmallVector<int> &ShuffleMask) {

  if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))

    return false;

  if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))

    return false;


  const unsigned ElementSize = VT.getScalarStoreSize();

  const unsigned NumElems = VT.getVectorNumElements();


  // Create the shuffle mask and check all bits active

  assert(ShuffleMask.empty());

  BitVector ActiveLanes(NumElems);

  for (unsigned i = 0; i < Index->getNumOperands(); i++) {

    // TODO: We've found an active bit of UB, and could be

    // more aggressive here if desired.

    if (Index->getOperand(i)->isUndef())

      return false;

    uint64_t C = Index->getConstantOperandVal(i);

    if (C % ElementSize != 0)

      return false;

    C = C / ElementSize;

    if (C >= NumElems)

      return false;

    ShuffleMask.push_back(C);

    ActiveLanes.set(C);

  }

  return ActiveLanes.all();

}


/// Match the index of a gather or scatter operation as an operation

/// with twice the element width and half the number of elements.  This is

/// generally profitable (if legal) because these operations are linear

/// in VL, so even if we cause some extract VTYPE/VL toggles, we still

/// come out ahead.


static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,

                                Align BaseAlign, const RISCVSubtarget &ST) {

  if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))

    return false;

  if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))

    return false;


  // Attempt a doubling.  If we can use a element type 4x or 8x in

  // size, this will happen via multiply iterations of the transform.

  const unsigned NumElems = VT.getVectorNumElements();

  if (NumElems % 2 != 0)

    return false;


  const unsigned ElementSize = VT.getScalarStoreSize();

  const unsigned WiderElementSize = ElementSize * 2;

  if (WiderElementSize > ST.getELen()/8)

    return false;


  if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)

    return false;


  for (unsigned i = 0; i < Index->getNumOperands(); i++) {

    // TODO: We've found an active bit of UB, and could be

    // more aggressive here if desired.

    if (Index->getOperand(i)->isUndef())

      return false;

    // TODO: This offset check is too strict if we support fully

    // misaligned memory operations.

    uint64_t C = Index->getConstantOperandVal(i);

    if (i % 2 == 0) {

      if (C % WiderElementSize != 0)

        return false;

      continue;

    }

    uint64_t Last = Index->getConstantOperandVal(i-1);

    if (C != Last + ElementSize)

      return false;

  }

  return true;

}


// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))

// This would be benefit for the cases where X and Y are both the same value

// type of low precision vectors. Since the truncate would be lowered into

// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate

// restriction, such pattern would be expanded into a series of "vsetvli"

// and "vnsrl" instructions later to reach this point.


static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) {

  SDValue Mask = N->getOperand(1);

  SDValue VL = N->getOperand(2);


  bool IsVLMAX = isAllOnesConstant(VL) ||

                 (isa<RegisterSDNode>(VL) &&

                  cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);

  if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||

      Mask.getOperand(0) != VL)

    return SDValue();


  auto IsTruncNode = [&](SDValue V) {

    return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&

           V.getOperand(1) == Mask && V.getOperand(2) == VL;

  };


  SDValue Op = N->getOperand(0);


  // We need to first find the inner level of TRUNCATE_VECTOR_VL node

  // to distinguish such pattern.

  while (IsTruncNode(Op)) {

    if (!Op.hasOneUse())

      return SDValue();

    Op = Op.getOperand(0);

  }


  if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())

    return SDValue();


  SDValue N0 = Op.getOperand(0);

  SDValue N1 = Op.getOperand(1);

  if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||

      N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())

    return SDValue();


  SDValue N00 = N0.getOperand(0);

  SDValue N10 = N1.getOperand(0);

  if (!N00.getValueType().isVector() ||

      N00.getValueType() != N10.getValueType() ||

      N->getValueType(0) != N10.getValueType())

    return SDValue();


  unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;

  SDValue SMin =

      DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,

                  DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));

  return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);

}


// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the

// maximum value for the truncated type.

// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1

// is the signed maximum value for the truncated type and C2 is the signed

// minimum value.


static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,

                                    const RISCVSubtarget &Subtarget) {

  assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);


  MVT VT = N->getSimpleValueType(0);


  SDValue Mask = N->getOperand(1);

  SDValue VL = N->getOperand(2);


  auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,

                                  APInt &SplatVal) {

    if (V.getOpcode() != Opc &&

        !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&

          V.getOperand(3) == Mask && V.getOperand(4) == VL))

      return SDValue();


    SDValue Op = V.getOperand(1);


    // Peek through conversion between fixed and scalable vectors.

    if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&

        isNullConstant(Op.getOperand(2)) &&

        Op.getOperand(1).getValueType().isFixedLengthVector() &&

        Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&

        Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&

        isNullConstant(Op.getOperand(1).getOperand(1)))

      Op = Op.getOperand(1).getOperand(0);


    if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))

      return V.getOperand(0);


    if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&

        Op.getOperand(2) == VL) {

      if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

        SplatVal =

            Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());

        return V.getOperand(0);

      }

    }


    return SDValue();

  };


  SDLoc DL(N);


  auto DetectUSatPattern = [&](SDValue V) {

    APInt LoC, HiC;


    // Simple case, V is a UMIN.

    if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))

      if (HiC.isMask(VT.getScalarSizeInBits()))

        return UMinOp;


    // If we have an SMAX that removes negative numbers first, then we can match

    // SMIN instead of UMIN.

    if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))

      if (SDValue SMaxOp =

              MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))

        if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))

          return SMinOp;


    // If we have an SMIN before an SMAX and the SMAX constant is less than or

    // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX

    // first.

    if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))

      if (SDValue SMinOp =

              MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))

        if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&

            HiC.uge(LoC))

          return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,

                             V.getOperand(1), DAG.getUNDEF(V.getValueType()),

                             Mask, VL);


    return SDValue();

  };


  auto DetectSSatPattern = [&](SDValue V) {

    unsigned NumDstBits = VT.getScalarSizeInBits();

    unsigned NumSrcBits = V.getScalarValueSizeInBits();

    APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);

    APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);


    APInt HiC, LoC;

    if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))

      if (SDValue SMaxOp =

              MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))

        if (HiC == SignedMax && LoC == SignedMin)

          return SMaxOp;


    if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))

      if (SDValue SMinOp =

              MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))

        if (HiC == SignedMax && LoC == SignedMin)

          return SMinOp;


    return SDValue();

  };


  SDValue Src = N->getOperand(0);


  // Look through multiple layers of truncates.

  while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&

         Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&

         Src.hasOneUse())

    Src = Src.getOperand(0);


  SDValue Val;

  unsigned ClipOpc;

  if ((Val = DetectUSatPattern(Src)))

    ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;

  else if ((Val = DetectSSatPattern(Src)))

    ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;

  else

    return SDValue();


  MVT ValVT = Val.getSimpleValueType();


  do {

    MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);

    ValVT = ValVT.changeVectorElementType(ValEltVT);

    Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);

  } while (ValVT != VT);


  return Val;

}


// Convert

//   (iX ctpop (bitcast (vXi1 A)))

// ->

//   (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))

// and

//   (iN reduce.add (zext (vXi1 A to vXiN))

// ->

//   (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))

// FIXME: It's complicated to match all the variations of this after type

// legalization so we only handle the pre-type legalization pattern, but that

// requires the fixed vector type to be legal.


static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,

                              const RISCVSubtarget &Subtarget) {

  unsigned Opc = N->getOpcode();

  assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&

         "Unexpected opcode");

  EVT VT = N->getValueType(0);

  if (!VT.isScalarInteger())

    return SDValue();


  SDValue Src = N->getOperand(0);


  if (Opc == ISD::CTPOP) {

    // Peek through zero_extend. It doesn't change the count.

    if (Src.getOpcode() == ISD::ZERO_EXTEND)

      Src = Src.getOperand(0);


    if (Src.getOpcode() != ISD::BITCAST)

      return SDValue();

    Src = Src.getOperand(0);

  } else if (Opc == ISD::VECREDUCE_ADD) {

    if (Src.getOpcode() != ISD::ZERO_EXTEND)

      return SDValue();

    Src = Src.getOperand(0);

  }


  EVT SrcEVT = Src.getValueType();

  if (!SrcEVT.isSimple())

    return SDValue();


  MVT SrcMVT = SrcEVT.getSimpleVT();

  // Make sure the input is an i1 vector.

  if (!SrcMVT.isVectorOf(MVT::i1))

    return SDValue();


  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  if (!TLI.isTypeLegal(SrcMVT))

    return SDValue();


  // Check that destination type is large enough to hold result without

  // overflow.

  if (Opc == ISD::VECREDUCE_ADD) {

    unsigned EltSize = SrcMVT.getScalarSizeInBits();

    unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();

    unsigned VectorBitsMax = Subtarget.getRealMaxVLen();

    unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()

                            ? SrcMVT.getVectorNumElements()

                            : RISCVTargetLowering::computeVLMAX(

                                  VectorBitsMax, EltSize, MinSize);

    if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)

      return SDValue();

  }


  MVT ContainerVT = SrcMVT;

  if (SrcMVT.isFixedLengthVector()) {

    ContainerVT = getContainerForFixedLengthVector(SrcMVT, Subtarget);

    Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

  }


  SDLoc DL(N);

  auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);


  MVT XLenVT = Subtarget.getXLenVT();

  SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);

  return DAG.getZExtOrTrunc(Pop, DL, VT);

}


static SDValue performSHLCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const RISCVSubtarget &Subtarget) {

  // (shl (zext x), y) -> (vwsll   x, y)

  if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))

    return V;


  // (shl (sext x), C) -> (vwmulsu x, 1u << C)

  // (shl (zext x), C) -> (vwmulu  x, 1u << C)


  if (!DCI.isAfterLegalizeDAG())

    return SDValue();


  SDValue LHS = N->getOperand(0);

  if (!LHS.hasOneUse())

    return SDValue();

  unsigned Opcode;

  switch (LHS.getOpcode()) {

  case ISD::SIGN_EXTEND:

  case RISCVISD::VSEXT_VL:

    Opcode = RISCVISD::VWMULSU_VL;

    break;

  case ISD::ZERO_EXTEND:

  case RISCVISD::VZEXT_VL:

    Opcode = RISCVISD::VWMULU_VL;

    break;

  default:

    return SDValue();

  }


  SDValue RHS = N->getOperand(1);

  APInt ShAmt;

  uint64_t ShAmtInt;

  if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))

    ShAmtInt = ShAmt.getZExtValue();

  else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&

           RHS.getOperand(1).getOpcode() == ISD::Constant)

    ShAmtInt = RHS.getConstantOperandVal(1);

  else

    return SDValue();


  // Better foldings:

  // (shl (sext x), 1) -> (vwadd  x, x)

  // (shl (zext x), 1) -> (vwaddu x, x)

  if (ShAmtInt <= 1)

    return SDValue();


  SDValue NarrowOp = LHS.getOperand(0);

  MVT NarrowVT = NarrowOp.getSimpleValueType();

  uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();

  if (ShAmtInt >= NarrowBits)

    return SDValue();

  MVT VT = N->getSimpleValueType(0);

  if (NarrowBits * 2 != VT.getScalarSizeInBits())

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;

  SDLoc DL(N);

  SDValue Passthru, Mask, VL;

  switch (N->getOpcode()) {

  case ISD::SHL:

    Passthru = DAG.getUNDEF(VT);

    std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);

    break;

  case RISCVISD::SHL_VL:

    Passthru = N->getOperand(2);

    Mask = N->getOperand(3);

    VL = N->getOperand(4);

    break;

  default:

    llvm_unreachable("Expected SHL");

  }

  return DAG.getNode(Opcode, DL, VT, NarrowOp,

                     DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),

                     Passthru, Mask, VL);

}


// Fold (smax (smin X, (1 << C) - 1), -(1 << C)) -> riscv_sati X, C.

// Fold (smin (smax X, -(1 << C)), (1 << C) - 1) -> riscv_sati X, C.

// Fold (smax (smin X, (1 << C) - 1), 0) -> riscv_usati X, C.

// Fold (smin (smax X, 0, (1 << C) - 1) -> riscv_usati X, C.


static SDValue combineMinMaxToSat(SDNode *N,

                                  TargetLowering::DAGCombinerInfo &DCI,

                                  const RISCVSubtarget &Subtarget) {

  if (!DCI.isAfterLegalizeDAG())

    return SDValue();


  if (!Subtarget.hasStdExtP())

    return SDValue();


  EVT VT = N->getValueType(0);


  if (VT != Subtarget.getXLenVT())

    return SDValue();


  SDValue N0 = N->getOperand(0);


  if ((N0.getOpcode() != ISD::SMIN && N0.getOpcode() != ISD::SMAX) ||

      !isa<ConstantSDNode>(N->getOperand(1)) ||

      !isa<ConstantSDNode>(N0.getOperand(1)))

    return SDValue();


  SDValue Min = SDValue(N, 0);

  SDValue Max = N0;

  SDValue Input = N0.getOperand(0);

  if (Min.getOpcode() == ISD::SMAX)

    std::swap(Min, Max);


  APInt MinC = Min.getConstantOperandAPInt(1);

  APInt MaxC = Max.getConstantOperandAPInt(1);


  if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||

      !(MinC + 1).isPowerOf2())

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;


  SDLoc DL(N);

  if (MinC == ~MaxC)

    return DAG.getNode(RISCVISD::SATI, DL, VT, Input,

                       DAG.getTargetConstant(MinC.countr_one(), DL, VT));

  if (MaxC == 0)

    return DAG.getNode(RISCVISD::USATI, DL, VT, Input,

                       DAG.getTargetConstant(MinC.countr_one(), DL, VT));


  return SDValue();

}


SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

                                               DAGCombinerInfo &DCI) const {

  SelectionDAG &DAG = DCI.DAG;

  const MVT XLenVT = Subtarget.getXLenVT();

  SDLoc DL(N);


  // Helper to call SimplifyDemandedBits on an operand of N where only some low

  // bits are demanded. N will be added to the Worklist if it was not deleted.

  // Caller should return SDValue(N, 0) if this returns true.

  auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {

    SDValue Op = N->getOperand(OpNo);

    APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);

    if (!SimplifyDemandedBits(Op, Mask, DCI))

      return false;


    if (N->getOpcode() != ISD::DELETED_NODE)

      DCI.AddToWorklist(N);

    return true;

  };


  switch (N->getOpcode()) {

  default:

    break;

  case RISCVISD::SplitF64: {

    SDValue Op0 = N->getOperand(0);

    // If the input to SplitF64 is just BuildPairF64 then the operation is

    // redundant. Instead, use BuildPairF64's operands directly.

    if (Op0->getOpcode() == RISCVISD::BuildPairF64)

      return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));


    if (Op0->isUndef()) {

      SDValue Lo = DAG.getUNDEF(MVT::i32);

      SDValue Hi = DAG.getUNDEF(MVT::i32);

      return DCI.CombineTo(N, Lo, Hi);

    }


    // It's cheaper to materialise two 32-bit integers than to load a double

    // from the constant pool and transfer it to integer registers through the

    // stack.

    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {

      APInt V = C->getValueAPF().bitcastToAPInt();

      SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);

      SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);

      return DCI.CombineTo(N, Lo, Hi);

    }


    // This is a target-specific version of a DAGCombine performed in

    // DAGCombiner::visitBITCAST. It performs the equivalent of:

    // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)

    // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))

    if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||

        !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())

      break;

    SDValue NewSplitF64 =

        DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),

                    Op0.getOperand(0));

    SDValue Lo = NewSplitF64.getValue(0);

    SDValue Hi = NewSplitF64.getValue(1);

    APInt SignBit = APInt::getSignMask(32);

    if (Op0.getOpcode() == ISD::FNEG) {

      SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,

                                  DAG.getConstant(SignBit, DL, MVT::i32));

      return DCI.CombineTo(N, Lo, NewHi);

    }

    assert(Op0.getOpcode() == ISD::FABS);

    SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,

                                DAG.getConstant(~SignBit, DL, MVT::i32));

    return DCI.CombineTo(N, Lo, NewHi);

  }

  case RISCVISD::SLLW:

  case RISCVISD::SRAW:

  case RISCVISD::SRLW:

  case RISCVISD::RORW:

  case RISCVISD::ROLW: {

    // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.

    if (SimplifyDemandedLowBitsHelper(0, 32) ||

        SimplifyDemandedLowBitsHelper(1, 5))

      return SDValue(N, 0);


    break;

  }

  case RISCVISD::ABSW:

  case RISCVISD::CLSW:

  case RISCVISD::CLZW:

  case RISCVISD::CTZW: {

    // Only the lower 32 bits of the first operand are read

    if (SimplifyDemandedLowBitsHelper(0, 32))

      return SDValue(N, 0);

    break;

  }

  case RISCVISD::WMULSU: {

    // Convert to MULHSU if only the upper half is used.

    if (!N->hasAnyUseOfValue(0)) {

      SDValue Res = DAG.getNode(RISCVISD::MULHSU, DL, N->getValueType(1),

                                N->getOperand(0), N->getOperand(1));

      return DCI.CombineTo(N, Res, Res);

    }

    break;

  }

  case RISCVISD::ADDD: {

    assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&

           "ADDD is only for RV32 with P extension");


    SDValue Op0Lo = N->getOperand(0);

    SDValue Op0Hi = N->getOperand(1);

    SDValue Op1Lo = N->getOperand(2);

    SDValue Op1Hi = N->getOperand(3);


    // (ADDD lo, hi, x, 0) -> (WADDAU lo, hi, x, 0)

    if (isNullConstant(Op1Hi)) {

      SDValue Result =

          DAG.getNode(RISCVISD::WADDAU, DL, DAG.getVTList(MVT::i32, MVT::i32),

                      Op0Lo, Op0Hi, Op1Lo, DAG.getConstant(0, DL, MVT::i32));

      return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

    }

    // (ADDD x, 0, lo, hi) -> (WADDAU lo, hi, x, 0)

    if (isNullConstant(Op0Hi)) {

      SDValue Result =

          DAG.getNode(RISCVISD::WADDAU, DL, DAG.getVTList(MVT::i32, MVT::i32),

                      Op1Lo, Op1Hi, Op0Lo, DAG.getConstant(0, DL, MVT::i32));

      return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

    }

    break;

  }

  case RISCVISD::SUBD: {

    assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&

           "SUBD is only for RV32 with P extension");


    SDValue Op0Lo = N->getOperand(0);

    SDValue Op0Hi = N->getOperand(1);

    SDValue Op1Lo = N->getOperand(2);

    SDValue Op1Hi = N->getOperand(3);


    // (SUBD lo, hi, x, 0) -> (WSUBAU lo, hi, 0, x)

    // WSUBAU semantics: rd = rd + zext(rs1) - zext(rs2)

    if (isNullConstant(Op1Hi)) {

      SDValue Result =

          DAG.getNode(RISCVISD::WSUBAU, DL, DAG.getVTList(MVT::i32, MVT::i32),

                      Op0Lo, Op0Hi, DAG.getConstant(0, DL, MVT::i32), Op1Lo);

      return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

    }

    break;

  }

  case RISCVISD::WADDAU: {

    assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&

           "WADDAU is only for RV32 with P extension");

    SDValue Op0Lo = N->getOperand(0);

    SDValue Op0Hi = N->getOperand(1);

    SDValue Op1 = N->getOperand(2);

    SDValue Op2 = N->getOperand(3);


    // (WADDAU lo, 0, rs1, 0) -> (WADDU lo, rs1)

    if (isNullConstant(Op0Hi) && isNullConstant(Op2)) {

      SDValue Result = DAG.getNode(

          RISCVISD::WADDU, DL, DAG.getVTList(MVT::i32, MVT::i32), Op0Lo, Op1);

      return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

    }


    // (WADDAU -C, -1, rs1, 0) -> (WSUBU rs1, C) where C > 0

    if (isNullConstant(Op2) && isAllOnesConstant(Op0Hi)) {

      if (auto *C0 = dyn_cast<ConstantSDNode>(Op0Lo)) {

        int64_t Val = C0->getSExtValue();

        if (Val < 0) {

          SDValue PosConst = DAG.getConstant(-Val, DL, MVT::i32);

          SDValue Result =

              DAG.getNode(RISCVISD::WSUBU, DL,

                          DAG.getVTList(MVT::i32, MVT::i32), Op1, PosConst);

          return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

        }

      }

    }


    // FIXME: Canonicalize zero Op1 to Op2.

    if (isNullConstant(Op2) && Op0Lo.getNode() == Op0Hi.getNode() &&

        Op0Lo.getResNo() == 0 && Op0Hi.getResNo() == 1 && Op0Lo.hasOneUse() &&

        Op0Hi.hasOneUse()) {

      // (WADDAU (WADDAU lo, hi, x, 0), y, 0) -> (WADDAU lo, hi, x, y)

      if (Op0Lo.getOpcode() == RISCVISD::WADDAU &&

          isNullConstant(Op0Lo.getOperand(3))) {

        SDValue Result = DAG.getNode(

            RISCVISD::WADDAU, DL, DAG.getVTList(MVT::i32, MVT::i32),

            Op0Lo.getOperand(0), Op0Lo.getOperand(1), Op0Lo.getOperand(2), Op1);

        return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

      }

      // (WADDAU (WSUBAU lo, hi, 0, a), b, 0) -> (WSUBAU lo, hi, b, a)

      if (Op0Lo.getOpcode() == RISCVISD::WSUBAU &&

          isNullConstant(Op0Lo.getOperand(2))) {

        SDValue Result = DAG.getNode(

            RISCVISD::WSUBAU, DL, DAG.getVTList(MVT::i32, MVT::i32),

            Op0Lo.getOperand(0), Op0Lo.getOperand(1), Op1, Op0Lo.getOperand(3));

        return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

      }

    }

    break;

  }

  case RISCVISD::WSUBAU: {

    assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&

           "WSUBAU is only for RV32 with P extension");

    SDValue Op0Lo = N->getOperand(0);

    SDValue Op0Hi = N->getOperand(1);

    SDValue Op1 = N->getOperand(2);

    SDValue Op2 = N->getOperand(3);


    // (WSUBAU lo, 0, 0, rs2) -> (WSUBU lo, rs2)

    if (isNullConstant(Op0Hi) && isNullConstant(Op1)) {

      SDValue Result = DAG.getNode(

          RISCVISD::WSUBU, DL, DAG.getVTList(MVT::i32, MVT::i32), Op0Lo, Op2);

      return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

    }


    // (WSUBAU (WADDAU lo, hi, a, 0), 0, b) -> (WSUBAU lo, hi, a, b)

    if (isNullConstant(Op1) && Op0Lo.getOpcode() == RISCVISD::WADDAU &&

        Op0Lo.getNode() == Op0Hi.getNode() && Op0Lo.getResNo() == 0 &&

        Op0Hi.getResNo() == 1 && Op0Lo.hasOneUse() && Op0Hi.hasOneUse() &&

        isNullConstant(Op0Lo.getOperand(3))) {

      SDValue Result = DAG.getNode(

          RISCVISD::WSUBAU, DL, DAG.getVTList(MVT::i32, MVT::i32),

          Op0Lo.getOperand(0), Op0Lo.getOperand(1), Op0Lo.getOperand(2), Op2);

      return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));

    }

    break;

  }

  case RISCVISD::FMV_W_X_RV64: {

    // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the

    // conversion is unnecessary and can be replaced with the

    // FMV_X_ANYEXTW_RV64 operand.

    SDValue Op0 = N->getOperand(0);

    if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)

      return Op0.getOperand(0);

    break;

  }

  case RISCVISD::FMV_X_ANYEXTH:

  case RISCVISD::FMV_X_ANYEXTW_RV64: {

    SDLoc DL(N);

    SDValue Op0 = N->getOperand(0);

    MVT VT = N->getSimpleValueType(0);


    // Constant fold.

    if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {

      APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());

      return DAG.getConstant(Val, DL, VT);

    }


    // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the

    // conversion is unnecessary and can be replaced with the FMV_W_X_RV64

    // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.

    if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&

         Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||

        (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&

         Op0->getOpcode() == RISCVISD::FMV_H_X)) {

      assert(Op0.getOperand(0).getValueType() == VT &&

             "Unexpected value type!");

      return Op0.getOperand(0);

    }


    if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&

        cast<LoadSDNode>(Op0)->isSimple()) {

      MVT IVT = MVT::getIntegerVT(Op0.getValueSizeInBits());

      auto *LN0 = cast<LoadSDNode>(Op0);

      SDValue Load =

          DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),

                         LN0->getBasePtr(), IVT, LN0->getMemOperand());

      DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));

      return Load;

    }


    // This is a target-specific version of a DAGCombine performed in

    // DAGCombiner::visitBITCAST. It performs the equivalent of:

    // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)

    // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))

    if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||

        !Op0.getNode()->hasOneUse())

      break;

    SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));

    unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;

    APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());

    if (Op0.getOpcode() == ISD::FNEG)

      return DAG.getNode(ISD::XOR, DL, VT, NewFMV,

                         DAG.getConstant(SignBit, DL, VT));


    assert(Op0.getOpcode() == ISD::FABS);

    return DAG.getNode(ISD::AND, DL, VT, NewFMV,

                       DAG.getConstant(~SignBit, DL, VT));

  }

  case ISD::ABS:

  case ISD::ABS_MIN_POISON: {

    EVT VT = N->getValueType(0);

    SDValue N0 = N->getOperand(0);

    // abs (sext) -> zext (abs)

    // abs (zext) -> zext (handled elsewhere)

    if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {

      SDValue Src = N0.getOperand(0);

      SDLoc DL(N);

      return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,

                         DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));

    }

    break;

  }

  case ISD::ADD: {

    if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))

      return V;

    if (SDValue V = combineToVWMACC(N, DAG, Subtarget))

      return V;

    if (SDValue V = combineVdot4aAccum(N, DAG, Subtarget))

      return V;

    return performADDCombine(N, DCI, Subtarget);

  }

  case ISD::SUB: {

    if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))

      return V;

    return performSUBCombine(N, DAG, Subtarget);

  }

  case ISD::AND:

    return performANDCombine(N, DCI, Subtarget);

  case ISD::OR: {

    if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))

      return V;

    return performORCombine(N, DCI, Subtarget);

  }

  case ISD::XOR:

    return performXORCombine(N, DAG, Subtarget);

  case ISD::MUL:

    if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))

      return V;

    return performMULCombine(N, DAG, DCI, Subtarget);

  case ISD::SDIV:

  case ISD::UDIV:

  case ISD::SREM:

  case ISD::UREM:

    if (SDValue V = combineBinOpOfZExt(N, DAG))

      return V;

    break;

  case ISD::FMUL: {

    using namespace SDPatternMatch;

    SDLoc DL(N);

    EVT VT = N->getValueType(0);

    SDValue X, Y;

    // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see

    // hoistFNegAboveFMulFDiv.

    // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.

    if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y))))))

      return DAG.getNode(ISD::FNEG, DL, VT,

                         DAG.getNode(ISD::FMUL, DL, VT, X, Y, N->getFlags()),

                         N->getFlags());


    // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y

    SDValue N0 = N->getOperand(0);

    SDValue N1 = N->getOperand(1);

    if (N0->getOpcode() != ISD::FCOPYSIGN)

      std::swap(N0, N1);

    if (N0->getOpcode() != ISD::FCOPYSIGN)

      return SDValue();

    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));

    if (!C || !C->getValueAPF().isExactlyValue(+1.0))

      return SDValue();

    if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))

      return SDValue();

    SDValue Sign = N0->getOperand(1);

    if (Sign.getValueType() != VT)

      return SDValue();

    return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));

  }

  case ISD::UMAX:

  case ISD::UMIN:

  case ISD::SMAX:

  case ISD::SMIN:

    if (SDValue V = combineMinMaxToSat(N, DCI, Subtarget))

      return V;

    [[fallthrough]];

  case ISD::FADD:

  case ISD::FMAXNUM:

  case ISD::FMINNUM: {

    if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))

      return V;

    if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

      return V;

    return SDValue();

  }

  case ISD::FMA: {

    SDValue N0 = N->getOperand(0);

    SDValue N1 = N->getOperand(1);

    if (N0.getOpcode() != ISD::SPLAT_VECTOR)

      std::swap(N0, N1);

    if (N0.getOpcode() != ISD::SPLAT_VECTOR)

      return SDValue();

    SDValue SplatN0 = N0.getOperand(0);

    if (SplatN0.getOpcode() != ISD::FNEG || !SplatN0.hasOneUse())

      return SDValue();

    EVT VT = N->getValueType(0);

    SDValue Splat =

        DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, SplatN0.getOperand(0));

    SDValue Fneg = DAG.getNode(ISD::FNEG, DL, VT, Splat);

    return DAG.getNode(ISD::FMA, DL, VT, Fneg, N1, N->getOperand(2));

  }

  case ISD::SETCC:

    return performSETCCCombine(N, DCI, Subtarget);

  case ISD::SIGN_EXTEND_INREG:

    return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);

  case ISD::ZERO_EXTEND:

    // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during

    // type legalization. This is safe because fp_to_uint produces poison if

    // it overflows.

    if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {

      SDValue Src = N->getOperand(0);

      if (Src.getOpcode() == ISD::FP_TO_UINT &&

          isTypeLegal(Src.getOperand(0).getValueType()))

        return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,

                           Src.getOperand(0));

      if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&

          isTypeLegal(Src.getOperand(1).getValueType())) {

        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);

        SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,

                                  Src.getOperand(0), Src.getOperand(1));

        DCI.CombineTo(N, Res);

        DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));

        DCI.recursivelyDeleteUnusedNodes(Src.getNode());

        return SDValue(N, 0); // Return N so it doesn't get rechecked.

      }

    }

    return SDValue();

  case RISCVISD::TRUNCATE_VECTOR_VL:

    if (SDValue V = combineTruncOfSraSext(N, DAG))

      return V;

    return combineTruncToVnclip(N, DAG, Subtarget);

  case ISD::TRUNCATE:

    return performTRUNCATECombine(N, DAG, Subtarget);

  case ISD::SELECT:

    return performSELECTCombine(N, DAG, Subtarget);

  case ISD::VSELECT:

    return performVSELECTCombine(N, DAG);

  case RISCVISD::CZERO_EQZ:

  case RISCVISD::CZERO_NEZ: {

    SDValue Val = N->getOperand(0);

    SDValue Cond = N->getOperand(1);

    MVT VT = N->getSimpleValueType(0);


    unsigned Opc = N->getOpcode();


    // czero_eqz x, x -> x

    if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)

      return Val;


    unsigned InvOpc =

        Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;


    // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.

    // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.

    if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {

      SDValue NewCond = Cond.getOperand(0);

      APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);

      if (DAG.MaskedValueIsZero(NewCond, Mask))

        return DAG.getNode(InvOpc, SDLoc(N), VT, Val, NewCond);

    }

    // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y

    // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y

    // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y

    // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y

    if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {

      ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

      if (ISD::isIntEqualitySetCC(CCVal))

        return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N), VT,

                           Val, Cond.getOperand(0));

    }


    // Remove SRL from bittest patterns (srl (and X, (1 << C)), C) if the and

    // is an ANDI. Because only 1 bit can be set after the AND, it doesn't

    // matter if we shift it.

    if (Cond.getOpcode() == ISD::SRL &&

        isa<ConstantSDNode>(Cond.getOperand(1)) &&

        Cond.getOperand(0).getOpcode() == ISD::AND) {

      const APInt &ShAmt = Cond.getConstantOperandAPInt(1);

      unsigned BitWidth = VT.getSizeInBits();

      SDValue And = Cond.getOperand(0);

      if (ShAmt.ult(BitWidth) && isa<ConstantSDNode>(And.getOperand(1))) {

        uint64_t AndConst = And.getConstantOperandVal(1);

        if (AndConst == (1ULL << ShAmt.getZExtValue()) && isInt<12>(AndConst))

          return DAG.getNode(Opc, DL, VT, Val, And);

      }

    }


    // czero_nez (setcc X, Y, CC), (setcc X, Y, eq) -> (setcc X, Y, CC)

    // if CC is a strict inequality (lt, gt, ult, ugt), because when X == Y

    // the setcc result is already 0. The eq operands can be in either order.

    if (Opc == RISCVISD::CZERO_NEZ && Val.getOpcode() == ISD::SETCC &&

        Cond.getOpcode() == ISD::SETCC &&

        cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ) {

      ISD::CondCode ValCC = cast<CondCodeSDNode>(Val.getOperand(2))->get();

      bool SameOperands = (Val.getOperand(0) == Cond.getOperand(0) &&

                           Val.getOperand(1) == Cond.getOperand(1)) ||

                          (Val.getOperand(0) == Cond.getOperand(1) &&

                           Val.getOperand(1) == Cond.getOperand(0));

      if (SameOperands && (ValCC == ISD::SETLT || ValCC == ISD::SETGT ||

                           ValCC == ISD::SETULT || ValCC == ISD::SETUGT))

        return Val;

    }


    return SDValue();

  }

  case RISCVISD::SELECT_CC: {

    // Transform

    SDValue LHS = N->getOperand(0);

    SDValue RHS = N->getOperand(1);

    SDValue CC = N->getOperand(2);

    ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();

    SDValue TrueV = N->getOperand(3);

    SDValue FalseV = N->getOperand(4);

    SDLoc DL(N);

    EVT VT = N->getValueType(0);


    // If the True and False values are the same, we don't need a select_cc.

    if (TrueV == FalseV)

      return TrueV;


    // (select (x < 0), y, z)  -> x >> (XLEN - 1) & (y - z) + z

    // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y

    if (!Subtarget.hasShortForwardBranchIALU() && isa<ConstantSDNode>(TrueV) &&

        isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&

        (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {

      if (CCVal == ISD::CondCode::SETGE)

        std::swap(TrueV, FalseV);


      int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();

      int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();

      // Only handle simm12, if it is not in this range, it can be considered as

      // register.

      if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&

          isInt<12>(TrueSImm - FalseSImm)) {

        SDValue SRA =

            DAG.getNode(ISD::SRA, DL, VT, LHS,

                        DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));

        SDValue AND =

            DAG.getNode(ISD::AND, DL, VT, SRA,

                        DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));

        return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);

      }


      if (CCVal == ISD::CondCode::SETGE)

        std::swap(TrueV, FalseV);

    }


    if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))

      return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),

                         {LHS, RHS, CC, TrueV, FalseV});


    if (!Subtarget.hasConditionalMoveFusion()) {

      // (select c, -1, y) -> -c | y

      if (isAllOnesConstant(TrueV)) {

        SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);

        SDValue Neg = DAG.getNegative(C, DL, VT);

        return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);

      }

      // (select c, y, -1) -> -!c | y

      if (isAllOnesConstant(FalseV)) {

        SDValue C =

            DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));

        SDValue Neg = DAG.getNegative(C, DL, VT);

        return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);

      }


      // (select c, 0, y) -> -!c & y

      if (isNullConstant(TrueV)) {

        SDValue C =

            DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));

        SDValue Neg = DAG.getNegative(C, DL, VT);

        return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);

      }

      // (select c, y, 0) -> -c & y

      if (isNullConstant(FalseV)) {

        SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);

        SDValue Neg = DAG.getNegative(C, DL, VT);

        return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);

      }

      // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))

      // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))

      if (((isOneConstant(FalseV) && LHS == TrueV &&

            CCVal == ISD::CondCode::SETNE) ||

           (isOneConstant(TrueV) && LHS == FalseV &&

            CCVal == ISD::CondCode::SETEQ)) &&

          isNullConstant(RHS)) {

        // freeze it to be safe.

        LHS = DAG.getFreeze(LHS);

        SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);

        return DAG.getNode(ISD::ADD, DL, VT, LHS, C);

      }

    }


    // If both true/false are an xor with 1, pull through the select.

    // This can occur after op legalization if both operands are setccs that

    // require an xor to invert.

    // FIXME: Generalize to other binary ops with identical operand?

    if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&

        TrueV.getOperand(1) == FalseV.getOperand(1) &&

        isOneConstant(TrueV.getOperand(1)) &&

        TrueV.hasOneUse() && FalseV.hasOneUse()) {

      SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,

                                   TrueV.getOperand(0), FalseV.getOperand(0));

      return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));

    }


    return SDValue();

  }

  case RISCVISD::BR_CC: {

    SDValue LHS = N->getOperand(1);

    SDValue RHS = N->getOperand(2);

    SDValue CC = N->getOperand(3);

    SDLoc DL(N);


    if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))

      return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),

                         N->getOperand(0), LHS, RHS, CC, N->getOperand(4));


    return SDValue();

  }

  case ISD::BITREVERSE:

    return performBITREVERSECombine(N, DAG, Subtarget);

  case ISD::FP_TO_SINT:

  case ISD::FP_TO_UINT:

    return performFP_TO_INTCombine(N, DCI, Subtarget);

  case ISD::FP_TO_SINT_SAT:

  case ISD::FP_TO_UINT_SAT:

    return performFP_TO_INT_SATCombine(N, DCI, Subtarget);

  case ISD::FCOPYSIGN: {

    EVT VT = N->getValueType(0);

    if (!VT.isVector())

      break;

    // There is a form of VFSGNJ which injects the negated sign of its second

    // operand. Try and bubble any FNEG up after the extend/round to produce

    // this optimized pattern. Avoid modifying cases where FP_ROUND and

    // TRUNC=1.

    SDValue In2 = N->getOperand(1);

    // Avoid cases where the extend/round has multiple uses, as duplicating

    // those is typically more expensive than removing a fneg.

    if (!In2.hasOneUse())

      break;

    if (In2.getOpcode() != ISD::FP_EXTEND &&

        (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))

      break;

    In2 = In2.getOperand(0);

    if (In2.getOpcode() != ISD::FNEG)

      break;

    SDLoc DL(N);

    SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);

    return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),

                       DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));

  }

  case ISD::MGATHER: {

    const auto *MGN = cast<MaskedGatherSDNode>(N);

    const EVT VT = N->getValueType(0);

    SDValue Index = MGN->getIndex();

    SDValue ScaleOp = MGN->getScale();

    ISD::MemIndexType IndexType = MGN->getIndexType();

    assert(!MGN->isIndexScaled() &&

           "Scaled gather/scatter should not be formed");


    SDLoc DL(N);

    if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

      return DAG.getMaskedGather(

          N->getVTList(), MGN->getMemoryVT(), DL,

          {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),

           MGN->getBasePtr(), Index, ScaleOp},

          MGN->getMemOperand(), IndexType, MGN->getExtensionType());


    if (narrowIndex(Index, IndexType, DAG))

      return DAG.getMaskedGather(

          N->getVTList(), MGN->getMemoryVT(), DL,

          {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),

           MGN->getBasePtr(), Index, ScaleOp},

          MGN->getMemOperand(), IndexType, MGN->getExtensionType());


    if (Index.getOpcode() == ISD::BUILD_VECTOR &&

        MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {

      // The sequence will be XLenVT, not the type of Index. Tell

      // isSimpleVIDSequence this so we avoid overflow.

      if (std::optional<VIDSequence> SimpleVID =

              isSimpleVIDSequence(Index, Subtarget.getXLen());

          SimpleVID && SimpleVID->StepDenominator == 1) {

        const int64_t StepNumerator = SimpleVID->StepNumerator;

        const int64_t Addend = SimpleVID->Addend;


        // Note: We don't need to check alignment here since (by assumption

        // from the existence of the gather), our offsets must be sufficiently

        // aligned.


        const EVT PtrVT = getPointerTy(DAG.getDataLayout());

        assert(MGN->getBasePtr()->getValueType(0) == PtrVT);

        assert(IndexType == ISD::UNSIGNED_SCALED);

        SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),

                                      DAG.getSignedConstant(Addend, DL, PtrVT));


        SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),

                                          VT.getVectorElementCount());

        SDValue StridedLoad = DAG.getStridedLoadVP(

            VT, DL, MGN->getChain(), BasePtr,

            DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),

            EVL, MGN->getMemOperand());

        SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,

                                       MGN->getPassThru());

        return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},

                                  DL);

      }

    }


    SmallVector<int> ShuffleMask;

    if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&

        matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {

      SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),

                                       MGN->getBasePtr(), DAG.getUNDEF(XLenVT),

                                       MGN->getMask(), DAG.getUNDEF(VT),

                                       MGN->getMemoryVT(), MGN->getMemOperand(),

                                       ISD::UNINDEXED, ISD::NON_EXTLOAD);

      SDValue Shuffle =

        DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);

      return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);

    }


    if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&

        matchIndexAsWiderOp(VT, Index, MGN->getMask(),

                            MGN->getMemOperand()->getBaseAlign(), Subtarget)) {

      SmallVector<SDValue> NewIndices;

      for (unsigned i = 0; i < Index->getNumOperands(); i += 2)

        NewIndices.push_back(Index.getOperand(i));

      EVT IndexVT = Index.getValueType()

        .getHalfNumVectorElementsVT(*DAG.getContext());

      Index = DAG.getBuildVector(IndexVT, DL, NewIndices);


      unsigned ElementSize = VT.getScalarStoreSize();

      EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);

      auto EltCnt = VT.getVectorElementCount();

      assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");

      EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,

                                    EltCnt.divideCoefficientBy(2));

      SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());

      EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,

                                    EltCnt.divideCoefficientBy(2));

      SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));


      SDValue Gather =

        DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,

                            {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),

                             Index, ScaleOp},

                            MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);

      SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));

      return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);

    }

    break;

  }

  case ISD::MSCATTER:{

    const auto *MSN = cast<MaskedScatterSDNode>(N);

    SDValue Index = MSN->getIndex();

    SDValue ScaleOp = MSN->getScale();

    ISD::MemIndexType IndexType = MSN->getIndexType();

    assert(!MSN->isIndexScaled() &&

           "Scaled gather/scatter should not be formed");


    SDLoc DL(N);

    if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

      return DAG.getMaskedScatter(

          N->getVTList(), MSN->getMemoryVT(), DL,

          {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),

           Index, ScaleOp},

          MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());


    if (narrowIndex(Index, IndexType, DAG))

      return DAG.getMaskedScatter(

          N->getVTList(), MSN->getMemoryVT(), DL,

          {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),

           Index, ScaleOp},

          MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());


    EVT VT = MSN->getValue()->getValueType(0);

    SmallVector<int> ShuffleMask;

    if (!MSN->isTruncatingStore() &&

        matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {

      SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),

                                             DAG.getUNDEF(VT), ShuffleMask);

      return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),

                                DAG.getUNDEF(XLenVT), MSN->getMask(),

                                MSN->getMemoryVT(), MSN->getMemOperand(),

                                ISD::UNINDEXED, false);

    }

    break;

  }

  case ISD::VP_GATHER: {

    const auto *VPGN = cast<VPGatherSDNode>(N);

    SDValue Index = VPGN->getIndex();

    SDValue ScaleOp = VPGN->getScale();

    ISD::MemIndexType IndexType = VPGN->getIndexType();

    assert(!VPGN->isIndexScaled() &&

           "Scaled gather/scatter should not be formed");


    SDLoc DL(N);

    if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

      return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,

                             {VPGN->getChain(), VPGN->getBasePtr(), Index,

                              ScaleOp, VPGN->getMask(),

                              VPGN->getVectorLength()},

                             VPGN->getMemOperand(), IndexType);


    if (narrowIndex(Index, IndexType, DAG))

      return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,

                             {VPGN->getChain(), VPGN->getBasePtr(), Index,

                              ScaleOp, VPGN->getMask(),

                              VPGN->getVectorLength()},

                             VPGN->getMemOperand(), IndexType);


    break;

  }

  case ISD::VP_SCATTER: {

    const auto *VPSN = cast<VPScatterSDNode>(N);

    SDValue Index = VPSN->getIndex();

    SDValue ScaleOp = VPSN->getScale();

    ISD::MemIndexType IndexType = VPSN->getIndexType();

    assert(!VPSN->isIndexScaled() &&

           "Scaled gather/scatter should not be formed");


    SDLoc DL(N);

    if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

      return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,

                              {VPSN->getChain(), VPSN->getValue(),

                               VPSN->getBasePtr(), Index, ScaleOp,

                               VPSN->getMask(), VPSN->getVectorLength()},

                              VPSN->getMemOperand(), IndexType);


    if (narrowIndex(Index, IndexType, DAG))

      return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,

                              {VPSN->getChain(), VPSN->getValue(),

                               VPSN->getBasePtr(), Index, ScaleOp,

                               VPSN->getMask(), VPSN->getVectorLength()},

                              VPSN->getMemOperand(), IndexType);

    break;

  }

  case RISCVISD::SHL_VL:

    if (SDValue V = performSHLCombine(N, DCI, Subtarget))

      return V;

    [[fallthrough]];

  case RISCVISD::SRA_VL:

  case RISCVISD::SRL_VL: {

    SDValue ShAmt = N->getOperand(1);

    if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {

      // We don't need the upper 32 bits of a 64-bit element for a shift amount.

      SDLoc DL(N);

      SDValue VL = N->getOperand(4);

      EVT VT = N->getValueType(0);

      ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),

                          ShAmt.getOperand(1), VL);

      return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,

                         N->getOperand(2), N->getOperand(3), N->getOperand(4));

    }

    break;

  }

  case ISD::SRA:

    if (SDValue V = performSRACombine(N, DAG, Subtarget))

      return V;

    [[fallthrough]];

  case ISD::SRL:

  case ISD::SHL: {

    if (N->getOpcode() == ISD::SHL) {

      if (SDValue V = performSHLCombine(N, DCI, Subtarget))

        return V;

    }

    SDValue ShAmt = N->getOperand(1);

    if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {

      // We don't need the upper 32 bits of a 64-bit element for a shift amount.

      SDLoc DL(N);

      EVT VT = N->getValueType(0);

      ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),

                          ShAmt.getOperand(1),

                          DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));

      return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);

    }

    break;

  }

  case RISCVISD::ADD_VL:

    if (SDValue V = simplifyOp_VL(N))

      return V;

    if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))

      return V;

    if (SDValue V = combineVdot4aAccum(N, DAG, Subtarget))

      return V;

    return combineToVWMACC(N, DAG, Subtarget);

  case RISCVISD::VWADDU_VL:

    return performVWABDACombine(N, DAG, Subtarget);

  case RISCVISD::VWADDU_W_VL:

    if (SDValue V = performVWABDACombineWV(N, DAG, Subtarget))

      return V;

    [[fallthrough]];

  case RISCVISD::VWADD_W_VL:

  case RISCVISD::VWSUB_W_VL:

  case RISCVISD::VWSUBU_W_VL:

    return performVWADDSUBW_VLCombine(N, DCI, Subtarget);

  case RISCVISD::OR_VL:

  case RISCVISD::SUB_VL:

  case RISCVISD::MUL_VL:

    return combineOp_VLToVWOp_VL(N, DCI, Subtarget);

  case RISCVISD::VFMADD_VL:

  case RISCVISD::VFNMADD_VL:

  case RISCVISD::VFMSUB_VL:

  case RISCVISD::VFNMSUB_VL:

  case RISCVISD::STRICT_VFMADD_VL:

  case RISCVISD::STRICT_VFNMADD_VL:

  case RISCVISD::STRICT_VFMSUB_VL:

  case RISCVISD::STRICT_VFNMSUB_VL:

    return performVFMADD_VLCombine(N, DCI, Subtarget);

  case RISCVISD::FADD_VL:

  case RISCVISD::FSUB_VL:

  case RISCVISD::FMUL_VL:

  case RISCVISD::VFWADD_W_VL:

  case RISCVISD::VFWSUB_W_VL:

    return combineOp_VLToVWOp_VL(N, DCI, Subtarget);

  case ISD::LOAD:

  case ISD::STORE: {

    if (DCI.isAfterLegalizeDAG())

      if (SDValue V = performMemPairCombine(N, DCI))

        return V;


    if (N->getOpcode() != ISD::STORE)

      break;


    auto *Store = cast<StoreSDNode>(N);

    SDValue Chain = Store->getChain();

    EVT MemVT = Store->getMemoryVT();

    SDValue Val = Store->getValue();

    SDLoc DL(N);


    bool IsScalarizable =

        MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&

        Store->isSimple() &&

        MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&

        isPowerOf2_64(MemVT.getSizeInBits()) &&

        MemVT.getSizeInBits() <= Subtarget.getXLen();


    // If sufficiently aligned we can scalarize stores of constant vectors of

    // any power-of-two size up to XLen bits, provided that they aren't too

    // expensive to materialize.

    //   vsetivli   zero, 2, e8, m1, ta, ma

    //   vmv.v.i    v8, 4

    //   vse64.v    v8, (a0)

    // ->

    //   li     a1, 1028

    //   sh     a1, 0(a0)

    if (DCI.isBeforeLegalize() && IsScalarizable &&

        ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {

      // Get the constant vector bits

      APInt NewC(Val.getValueSizeInBits(), 0);

      uint64_t EltSize = Val.getScalarValueSizeInBits();

      for (unsigned i = 0; i < Val.getNumOperands(); i++) {

        if (Val.getOperand(i).isUndef())

          continue;

        NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),

                        i * EltSize);

      }

      MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());


      if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,

                                     true) <= 2 &&

          allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                         NewVT, *Store->getMemOperand())) {

        SDValue NewV = DAG.getConstant(NewC, DL, NewVT);

        return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),

                            Store->getPointerInfo(), Store->getBaseAlign(),

                            Store->getMemOperand()->getFlags());

      }

    }


    // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.

    //   vsetivli   zero, 2, e16, m1, ta, ma

    //   vle16.v    v8, (a0)

    //   vse16.v    v8, (a1)

    if (auto *L = dyn_cast<LoadSDNode>(Val);

        L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&

        L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&

        Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&

        L->getMemoryVT() == MemVT) {

      MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());

      if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                         NewVT, *Store->getMemOperand()) &&

          allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

                                         NewVT, *L->getMemOperand())) {

        SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),

                                   L->getPointerInfo(), L->getBaseAlign(),

                                   L->getMemOperand()->getFlags());

        return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),

                            Store->getPointerInfo(), Store->getBaseAlign(),

                            Store->getMemOperand()->getFlags());

      }

    }


    // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.

    // vfmv.f.s is represented as extract element from 0. Match it late to avoid

    // any illegal types.

    if ((Val.getOpcode() == RISCVISD::VMV_X_S ||

         (DCI.isAfterLegalizeDAG() &&

          Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

          isNullConstant(Val.getOperand(1)))) &&

        Val.hasOneUse()) {

      SDValue Src = Val.getOperand(0);

      EVT VecVT = Src.getValueType();

      // VecVT should be scalable and memory VT should match the element type.

      if (!Store->isIndexed() && Store->isSimple() &&

          VecVT.isScalableVectorOf(MemVT)) {

        SDLoc DL(N);

        MVT MaskVT = getMaskTypeFor(VecVT.getSimpleVT());

        // Create a vector memory VT so allowsMisalignedMemoryAccesses will

        // work correctly.

        MemVT = EVT::getVectorVT(*DAG.getContext(), MemVT, 1);

        return DAG.getStoreVP(

            Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),

            DAG.getConstant(1, DL, MaskVT),

            DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,

            Store->getMemOperand(), Store->getAddressingMode());

      }

    }


    break;

  }

  case ISD::SPLAT_VECTOR: {

    EVT VT = N->getValueType(0);

    // Only perform this combine on legal MVT types.

    if (!isTypeLegal(VT))

      break;

    if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,

                                         DAG, Subtarget))

      return Gather;

    break;

  }

  case ISD::BUILD_VECTOR:

    if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))

      return V;

    break;

  case ISD::CONCAT_VECTORS:

    if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))

      return V;

    break;

  case ISD::VECTOR_SHUFFLE:

    if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))

      return V;

    break;

  case ISD::INSERT_VECTOR_ELT:

    if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))

      return V;

    break;

  case RISCVISD::VFMV_V_F_VL: {

    const MVT VT = N->getSimpleValueType(0);

    SDValue Passthru = N->getOperand(0);

    SDValue Scalar = N->getOperand(1);

    SDValue VL = N->getOperand(2);


    // If VL is 1, we can use vfmv.s.f.

    if (isOneConstant(VL))

      return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);

    break;

  }

  case RISCVISD::VMV_V_X_VL: {

    const MVT VT = N->getSimpleValueType(0);

    SDValue Passthru = N->getOperand(0);

    SDValue Scalar = N->getOperand(1);

    SDValue VL = N->getOperand(2);


    // Tail agnostic VMV.V.X only demands the vector element bitwidth from the

    // scalar input.

    unsigned ScalarSize = Scalar.getValueSizeInBits();

    unsigned EltWidth = VT.getScalarSizeInBits();

    if (ScalarSize > EltWidth && Passthru.isUndef())

      if (SimplifyDemandedLowBitsHelper(1, EltWidth))

        return SDValue(N, 0);


    // If VL is 1 and the scalar value won't benefit from immediate, we can

    // use vmv.s.x.

    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);

    if (isOneConstant(VL) &&

        (!Const || Const->isZero() ||

         !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))

      return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);


    break;

  }

  case RISCVISD::VFMV_S_F_VL: {

    SDValue Src = N->getOperand(1);

    // Try to remove vector->scalar->vector if the scalar->vector is inserting

    // into an undef vector.

    // TODO: Could use a vslide or vmv.v.v for non-undef.

    if (N->getOperand(0).isUndef() &&

        Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

        isNullConstant(Src.getOperand(1)) &&

        Src.getOperand(0).getValueType().isScalableVector()) {

      EVT VT = N->getValueType(0);

      SDValue EVSrc = Src.getOperand(0);

      EVT EVSrcVT = EVSrc.getValueType();

      assert(EVSrcVT.getVectorElementType() == VT.getVectorElementType());

      // Widths match, just return the original vector.

      if (EVSrcVT == VT)

        return EVSrc;

      SDLoc DL(N);

      // Width is narrower, using insert_subvector.

      if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {

        return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),

                           EVSrc,

                           DAG.getConstant(0, DL, Subtarget.getXLenVT()));

      }

      // Width is wider, using extract_subvector.

      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,

                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));

    }

    [[fallthrough]];

  }

  case RISCVISD::VMV_S_X_VL: {

    const MVT VT = N->getSimpleValueType(0);

    SDValue Passthru = N->getOperand(0);

    SDValue Scalar = N->getOperand(1);

    SDValue VL = N->getOperand(2);


    // The vmv.s.x instruction copies the scalar integer register to element 0

    // of the destination vector register. If SEW < XLEN, the least-significant

    // bits are copied and the upper XLEN-SEW bits are ignored.

    unsigned ScalarSize = Scalar.getValueSizeInBits();

    unsigned EltWidth = VT.getScalarSizeInBits();

    if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))

      return SDValue(N, 0);


    if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&

        Scalar.getOperand(0).getValueType() == N->getValueType(0))

      return Scalar.getOperand(0);


    // Use M1 or smaller to avoid over constraining register allocation

    const MVT M1VT = RISCVTargetLowering::getM1VT(VT);

    if (M1VT.bitsLT(VT)) {

      SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);

      SDValue Result =

          DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);

      Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);

      return Result;

    }


    // We use a vmv.v.i if possible.  We limit this to LMUL1.  LMUL2 or

    // higher would involve overly constraining the register allocator for

    // no purpose.

    if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);

        Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&

        VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())

      return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);


    break;

  }

  case RISCVISD::VMV_X_S: {

    SDValue Vec = N->getOperand(0);

    MVT VecVT = N->getOperand(0).getSimpleValueType();

    const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);

    if (M1VT.bitsLT(VecVT)) {

      Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);

      return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getValueType(0), Vec);

    }

    break;

  }

  case ISD::INTRINSIC_VOID:

  case ISD::INTRINSIC_W_CHAIN:

  case ISD::INTRINSIC_WO_CHAIN: {

    unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;

    unsigned IntNo = N->getConstantOperandVal(IntOpNo);

    switch (IntNo) {

      // By default we do not combine any intrinsic.

    default:

      return SDValue();

    case Intrinsic::riscv_vcpop:

    case Intrinsic::riscv_vcpop_mask:

    case Intrinsic::riscv_vfirst:

    case Intrinsic::riscv_vfirst_mask: {

      SDValue VL = N->getOperand(2);

      if (IntNo == Intrinsic::riscv_vcpop_mask ||

          IntNo == Intrinsic::riscv_vfirst_mask)

        VL = N->getOperand(3);

      if (!isNullConstant(VL))

        return SDValue();

      // If VL is 0, vcpop -> li 0, vfirst -> li -1.

      SDLoc DL(N);

      EVT VT = N->getValueType(0);

      if (IntNo == Intrinsic::riscv_vfirst ||

          IntNo == Intrinsic::riscv_vfirst_mask)

        return DAG.getAllOnesConstant(DL, VT);

      return DAG.getConstant(0, DL, VT);

    }

    case Intrinsic::riscv_vsseg2_mask:

    case Intrinsic::riscv_vsseg3_mask:

    case Intrinsic::riscv_vsseg4_mask:

    case Intrinsic::riscv_vsseg5_mask:

    case Intrinsic::riscv_vsseg6_mask:

    case Intrinsic::riscv_vsseg7_mask:

    case Intrinsic::riscv_vsseg8_mask: {

      SDValue Tuple = N->getOperand(2);

      unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();


      if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||

          Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||

          !Tuple.getOperand(0).isUndef())

        return SDValue();


      SDValue Val = Tuple.getOperand(1);

      unsigned Idx = Tuple.getConstantOperandVal(2);


      unsigned SEW = Val.getValueType().getScalarSizeInBits();

      assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&

             "Type mismatch without bitcast?");

      unsigned Stride = SEW / 8 * NF;

      unsigned Offset = SEW / 8 * Idx;


      SDValue Ops[] = {

          /*Chain=*/N->getOperand(0),

          /*IntID=*/

          DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),

          /*StoredVal=*/Val,

          /*Ptr=*/

          DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),

                      DAG.getConstant(Offset, DL, XLenVT)),

          /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),

          /*Mask=*/N->getOperand(4),

          /*VL=*/N->getOperand(5)};


      auto *OldMemSD = cast<MemIntrinsicSDNode>(N);

      // Match getTgtMemIntrinsic for non-unit stride case

      EVT MemVT = OldMemSD->getMemoryVT().getScalarType();

      MachineFunction &MF = DAG.getMachineFunction();

      MachineMemOperand *MMO = MF.getMachineMemOperand(

          OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);


      SDVTList VTs = DAG.getVTList(MVT::Other);

      return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,

                                     MMO);

    }

    }

  }

  case ISD::VECTOR_SPLICE_RIGHT:

  case ISD::EXPERIMENTAL_VP_REVERSE:

    return performReverseEVLCombine(N, DAG, Subtarget);

  case ISD::VP_STORE:

    return performVP_STORECombine(N, DAG, Subtarget);

  case ISD::BITCAST: {

    if (Subtarget.hasStdExtP())

      if (SDValue V = performP_BITCASTCombine(N, DAG, Subtarget))

        return V;

    if (!Subtarget.useRVVForFixedLengthVectors())

      return SDValue();

    SDValue N0 = N->getOperand(0);

    EVT VT = N->getValueType(0);

    EVT SrcVT = N0.getValueType();

    if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {

      unsigned NF = VT.getRISCVVectorTupleNumFields();

      unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);

      SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());

      MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);


      SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);


      SDValue Result = DAG.getUNDEF(VT);

      for (unsigned i = 0; i < NF; ++i)

        Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,

                             DAG.getTargetConstant(i, DL, MVT::i32));

      return Result;

    }

    // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer

    // type, widen both sides to avoid a trip through memory.

    if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&

        VT.isScalarInteger()) {

      unsigned NumConcats = 8 / SrcVT.getVectorNumElements();

      SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));

      Ops[0] = N0;

      SDLoc DL(N);

      N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);

      N0 = DAG.getBitcast(MVT::i8, N0);

      return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);

    }


    return SDValue();

  }

  case ISD::VECREDUCE_ADD:

    if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))

      return V;

    [[fallthrough]];

  case ISD::CTPOP:

    if (SDValue V = combineToVCPOP(N, DAG, Subtarget))

      return V;

    break;

  case RISCVISD::VRGATHER_VX_VL: {

    // Note this assumes that out of bounds indices produce poison

    // and can thus be replaced without having to prove them inbounds..

    EVT VT = N->getValueType(0);

    SDValue Src = N->getOperand(0);

    SDValue Idx = N->getOperand(1);

    SDValue Passthru = N->getOperand(2);

    SDValue VL = N->getOperand(4);


    // Warning: Unlike most cases we strip an insert_subvector, this one

    // does not require the first operand to be undef.

    if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&

        isNullConstant(Src.getOperand(2)))

      Src = Src.getOperand(1);


    switch (Src.getOpcode()) {

    default:

      break;

    case RISCVISD::VMV_V_X_VL:

    case RISCVISD::VFMV_V_F_VL:

      // Drop a redundant vrgather_vx.

      // TODO: Remove the type restriction if we find a motivating

      // test case?

      if (Passthru.isUndef() && VL == Src.getOperand(2) &&

          Src.getValueType() == VT)

        return Src;

      break;

    case RISCVISD::VMV_S_X_VL:

    case RISCVISD::VFMV_S_F_VL:

      // If this use only demands lane zero from the source vmv.s.x, and

      // doesn't have a passthru, then this vrgather.vi/vx is equivalent to

      // a vmv.v.x.  Note that there can be other uses of the original

      // vmv.s.x and thus we can't eliminate it.  (vfmv.s.f is analogous)

      if (isNullConstant(Idx) && Passthru.isUndef() &&

          VL == Src.getOperand(2)) {

        unsigned Opc =

            VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;

        return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),

                           VL);

      }

      break;

    }

    break;

  }

  case RISCVISD::TUPLE_EXTRACT: {

    EVT VT = N->getValueType(0);

    SDValue Tuple = N->getOperand(0);

    unsigned Idx = N->getConstantOperandVal(1);

    if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)

      break;


    unsigned NF = 0;

    switch (Tuple.getConstantOperandVal(1)) {

    default:

      break;

    case Intrinsic::riscv_vlseg2_mask:

    case Intrinsic::riscv_vlseg3_mask:

    case Intrinsic::riscv_vlseg4_mask:

    case Intrinsic::riscv_vlseg5_mask:

    case Intrinsic::riscv_vlseg6_mask:

    case Intrinsic::riscv_vlseg7_mask:

    case Intrinsic::riscv_vlseg8_mask:

      NF = Tuple.getValueType().getRISCVVectorTupleNumFields();

      break;

    }


    if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))

      break;


    unsigned SEW = VT.getScalarSizeInBits();

    assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&

           "Type mismatch without bitcast?");

    unsigned Stride = SEW / 8 * NF;

    unsigned Offset = SEW / 8 * Idx;


    SDValue Passthru = Tuple.getOperand(2);

    if (Passthru.isUndef())

      Passthru = DAG.getUNDEF(VT);

    else

      Passthru = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VT, Passthru,

                             N->getOperand(1));


    SDValue Ops[] = {

        /*Chain=*/Tuple.getOperand(0),

        /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),

        /*Passthru=*/Passthru,

        /*Ptr=*/

        DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),

                    DAG.getConstant(Offset, DL, XLenVT)),

        /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),

        /*Mask=*/Tuple.getOperand(4),

        /*VL=*/Tuple.getOperand(5),

        /*Policy=*/Tuple.getOperand(6)};


    auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);

    // Match getTgtMemIntrinsic for non-unit stride case

    EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();

    MachineFunction &MF = DAG.getMachineFunction();

    MachineMemOperand *MMO = MF.getMachineMemOperand(

        TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);


    SDVTList VTs = DAG.getVTList({VT, MVT::Other});

    SDValue Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,

                                             Ops, MemVT, MMO);

    DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));

    return Result.getValue(0);

  }

  case RISCVISD::TUPLE_INSERT: {

    // tuple_insert tuple, undef, idx -> tuple

    if (N->getOperand(1).isUndef())

      return N->getOperand(0);

    break;

  }

  case RISCVISD::VMERGE_VL: {

    // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl

    SDValue Mask = N->getOperand(0);

    SDValue True = N->getOperand(1);

    SDValue Passthru = N->getOperand(3);

    SDValue VL = N->getOperand(4);


    // Fixed vectors are wrapped in scalable containers, unwrap them.

    using namespace SDPatternMatch;

    SDValue SubVec;

    if (sd_match(Mask, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))

      Mask = SubVec;


    if (!isOneOrOneSplat(Mask))

      break;


    return DAG.getNode(RISCVISD::VMV_V_V_VL, SDLoc(N), N->getValueType(0),

                       Passthru, True, VL);

  }

  case RISCVISD::VMV_V_V_VL: {

    // vmv_v_v passthru, splat(x), vl -> vmv_v_x passthru, x, vl

    SDValue Passthru = N->getOperand(0);

    SDValue Src = N->getOperand(1);

    SDValue VL = N->getOperand(2);


    // Fixed vectors are wrapped in scalable containers, unwrap them.

    using namespace SDPatternMatch;

    SDValue SubVec;

    if (sd_match(Src, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))

      Src = SubVec;


    SDValue SplatVal = DAG.getSplatValue(Src, /*LegalTypes=*/true);

    if (!SplatVal)

      break;

    MVT VT = N->getSimpleValueType(0);

    return lowerScalarSplat(Passthru, SplatVal, VL, VT, SDLoc(N), DAG,

                            Subtarget);

  }

  case RISCVISD::VSLIDEDOWN_VL:

  case RISCVISD::VSLIDEUP_VL:

    if (N->getOperand(1)->isUndef())

      return N->getOperand(0);

    break;

  case RISCVISD::VSLIDE1UP_VL:

  case RISCVISD::VFSLIDE1UP_VL: {

    using namespace SDPatternMatch;

    SDValue SrcVec;

    SDLoc DL(N);

    MVT VT = N->getSimpleValueType(0);

    // If the scalar we're sliding in was extracted from the first element of a

    // vector, we can use that vector as the passthru in a normal slideup of 1.

    // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).

    if (!N->getOperand(0).isUndef() ||

        !sd_match(N->getOperand(2),

                  m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),

                          m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))

      break;


    MVT SrcVecVT = SrcVec.getSimpleValueType();

    if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())

      break;

    // Adapt the value type of source vector.

    if (SrcVecVT.isFixedLengthVector()) {

      SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);

      SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);

    }

    if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())

      SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);

    else

      SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);


    return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),

                       DAG.getConstant(1, DL, XLenVT), N->getOperand(3),

                       N->getOperand(4));

  }

  }


  return SDValue();

}


bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(

    EVT XVT, unsigned KeptBits) const {

  // For vectors, we don't have a preference..

  if (XVT.isVector())

    return false;


  if (XVT != MVT::i32 && XVT != MVT::i64)

    return false;


  // We can use sext.w for RV64 or an srai 31 on RV32.

  if (KeptBits == 32 || KeptBits == 64)

    return true;


  // With Zbb we can use sext.h/sext.b.

  return Subtarget.hasStdExtZbb() &&

         ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||

          KeptBits == 16);

}


bool RISCVTargetLowering::isDesirableToCommuteWithShift(

    const SDNode *N, CombineLevel Level) const {

  assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||

          N->getOpcode() == ISD::SRL) &&

         "Expected shift op");


  // The following folds are only desirable if `(OP _, c1 << c2)` can be

  // materialised in fewer instructions than `(OP _, c1)`:

  //

  //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)

  //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)

  SDValue N0 = N->getOperand(0);

  EVT Ty = N0.getValueType();


  // LD/ST will optimize constant Offset extraction, so when AddNode is used by

  // LD/ST, it can still complete the folding optimization operation performed

  // above.

  auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {

    for (SDNode *Use : X->users()) {

      // This use is the one we're on right now. Skip it

      if (Use == User || Use->getOpcode() == ISD::SELECT)

        continue;

      if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))

        return false;

    }

    return true;

  };


  if (Ty.isScalarInteger() &&

      (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {

    if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())

      return isUsedByLdSt(N0.getNode(), N);


    auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));

    auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));


    // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.

    if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&

        N->user_begin()->getOpcode() == ISD::ADD &&

        !isUsedByLdSt(*N->user_begin(), nullptr) &&

        !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))

      return false;


    if (C1 && C2) {

      const APInt &C1Int = C1->getAPIntValue();

      APInt ShiftedC1Int = C1Int << C2->getAPIntValue();


      // We can materialise `c1 << c2` into an add immediate, so it's "free",

      // and the combine should happen, to potentially allow further combines

      // later.

      if (ShiftedC1Int.getSignificantBits() <= 64 &&

          isLegalAddImmediate(ShiftedC1Int.getSExtValue()))

        return true;


      // We can materialise `c1` in an add immediate, so it's "free", and the

      // combine should be prevented.

      if (C1Int.getSignificantBits() <= 64 &&

          isLegalAddImmediate(C1Int.getSExtValue()))

        return false;


      // Neither constant will fit into an immediate, so find materialisation

      // costs.

      int C1Cost =

          RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,

                                     /*CompressionCost*/ true);

      int ShiftedC1Cost = RISCVMatInt::getIntMatCost(

          ShiftedC1Int, Ty.getSizeInBits(), Subtarget,

          /*CompressionCost*/ true);


      // Materialising `c1` is cheaper than materialising `c1 << c2`, so the

      // combine should be prevented.

      if (C1Cost < ShiftedC1Cost)

        return false;

    }

  }


  if (!N0->hasOneUse())

    return false;


  if (N0->getOpcode() == ISD::SIGN_EXTEND &&

      N0->getOperand(0)->getOpcode() == ISD::ADD &&

      !N0->getOperand(0)->hasOneUse())

    return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());


  return true;

}


bool RISCVTargetLowering::targetShrinkDemandedConstant(

    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,

    TargetLoweringOpt &TLO) const {

  // Delay this optimization as late as possible.

  if (!TLO.LegalOps)

    return false;


  EVT VT = Op.getValueType();

  if (VT.isVector())

    return false;


  unsigned Opcode = Op.getOpcode();

  if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)

    return false;


  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));

  if (!C)

    return false;


  const APInt &Mask = C->getAPIntValue();


  // Clear all non-demanded bits initially.

  APInt ShrunkMask = Mask & DemandedBits;


  // Try to make a smaller immediate by setting undemanded bits.


  APInt ExpandedMask = Mask | ~DemandedBits;


  auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {

    return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);

  };

  auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {

    if (NewMask == Mask)

      return true;

    SDLoc DL(Op);

    SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());

    SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),

                                    Op.getOperand(0), NewC);

    return TLO.CombineTo(Op, NewOp);

  };


  // If the shrunk mask fits in sign extended 12 bits, let the target

  // independent code apply it.

  if (ShrunkMask.isSignedIntN(12))

    return false;


  // And has a few special cases for zext.

  if (Opcode == ISD::AND) {

    // Preserve (and X, 0xffff), if zext.h exists use zext.h,

    // otherwise use SLLI + SRLI.

    APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);

    if (IsLegalMask(NewMask))

      return UseMask(NewMask);


    // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.

    if (VT == MVT::i64) {

      APInt NewMask = APInt(64, 0xffffffff);

      if (IsLegalMask(NewMask))

        return UseMask(NewMask);

    }

  }


  // For the remaining optimizations, we need to be able to make a negative

  // number through a combination of mask and undemanded bits.

  if (ExpandedMask.isNegative()) {

    // What is the fewest number of bits we need to represent the negative

    // number.

    unsigned MinSignedBits = ExpandedMask.getSignificantBits();


    // Try to make a 12 bit negative immediate. If that fails try to make a 32

    // bit negative immediate unless the shrunk immediate already fits in 32

    // bits. If we can't create a simm12, we shouldn't change opaque constants.

    if (MinSignedBits <= 12) {

      APInt NewMask = ShrunkMask;

      NewMask.setBitsFrom(11);

      assert(IsLegalMask(NewMask));

      return UseMask(NewMask);

    }

    if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) {

      APInt NewMask = ShrunkMask;

      NewMask.setBitsFrom(31);

      assert(IsLegalMask(NewMask));

      return UseMask(NewMask);

    }

  }


  // Try to form a constant that can be materialized with:

  //   lui a0, hi20

  //   addi(w) a0, a0, lo12

  //   slli a1, a0, 32

  //   add a0, a0, a1

  //

  // Or:

  //   lui a0, hi20

  //   addi(w) a0, a0, lo12

  //   pack a0, a0, a0

  //

  if (!ShrunkMask.isSignedIntN(32) && !C->isOpaque() && Opcode == ISD::AND &&

      VT == MVT::i64 && Subtarget.is64Bit()) {

    uint32_t Lo32Shrunk = Lo_32(ShrunkMask.getZExtValue());

    uint32_t Hi32Shrunk = Hi_32(ShrunkMask.getZExtValue());


    // Only use this pattern if some bits in the upper and lower half must be

    // non-zero.

    if (Lo32Shrunk != Hi32Shrunk && Lo32Shrunk != 0 && Hi32Shrunk != 0) {

      // Find a 32-bit value that works for both halves.

      uint32_t Lo32Required = Lo32Shrunk | Hi32Shrunk;


      // Replicate the 32-bit value to both halves.

      uint64_t DupConstant = Make_64(Lo32Required, Lo32Required);


      // Verify the new constant is legal.

      APInt CandidateMask(64, DupConstant);

      if (IsLegalMask(CandidateMask)) {

        unsigned OrigCost =

            RISCVMatInt::generateInstSeq(ShrunkMask.getSExtValue(), Subtarget)

                .size();

        unsigned NewCost =

            RISCVMatInt::generateInstSeq(DupConstant, Subtarget).size();

        // If the new sequence is shorter than the old sequence and won't

        // use a constant pool, make the change.

        if (NewCost < OrigCost && (!Subtarget.useConstantPoolForLargeInts() ||

                                   NewCost <= Subtarget.getMaxBuildIntsCost()))

          return UseMask(CandidateMask);


        // For the 2 register form, if we're optimizing for size, only do

        // this if the original constant wasn't going to use a constant pool.

        if (!TLO.DAG.shouldOptForSize() ||

            !Subtarget.useConstantPoolForLargeInts() ||

            OrigCost <= Subtarget.getMaxBuildIntsCost()) {

          unsigned ShiftAmt, AddOpc;

          RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq(

              DupConstant, Subtarget, ShiftAmt, AddOpc);

          if (!SeqLo.empty()) {

            NewCost = SeqLo.size() + 2;

            if (NewCost < OrigCost &&

                (!Subtarget.useConstantPoolForLargeInts() ||

                 (NewCost <= Subtarget.getMaxBuildIntsCost())))

              return UseMask(CandidateMask);

          }

        }

      }

    }

  }


  return false;

}


static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {

  static const uint64_t GREVMasks[] = {

      0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,

      0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};


  for (unsigned Stage = 0; Stage != 6; ++Stage) {

    unsigned Shift = 1 << Stage;

    if (ShAmt & Shift) {

      uint64_t Mask = GREVMasks[Stage];

      uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);

      if (IsGORC)

        Res |= x;

      x = Res;

    }

  }


  return x;

}


void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,

                                                        KnownBits &Known,

                                                        const APInt &DemandedElts,

                                                        const SelectionDAG &DAG,

                                                        unsigned Depth) const {

  unsigned BitWidth = Known.getBitWidth();

  unsigned Opc = Op.getOpcode();

  assert((Opc >= ISD::BUILTIN_OP_END ||

          Opc == ISD::INTRINSIC_WO_CHAIN ||

          Opc == ISD::INTRINSIC_W_CHAIN ||

          Opc == ISD::INTRINSIC_VOID) &&

         "Should use MaskedValueIsZero if you don't know whether Op"

         " is a target node!");


  Known.resetAll();

  switch (Opc) {

  default: break;

  case RISCVISD::SELECT_CC: {

    Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);

    // If we don't know any bits, early out.

    if (Known.isUnknown())

      break;

    KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);


    // Only known if known in both the LHS and RHS.

    Known = Known.intersectWith(Known2);

    break;

  }

  case RISCVISD::VCPOP_VL: {

    KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);

    Known.Zero.setBitsFrom(Known2.countMaxActiveBits());

    break;

  }

  case RISCVISD::CZERO_EQZ:

  case RISCVISD::CZERO_NEZ:

    Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);

    // Result is either all zero or operand 0. We can propagate zeros, but not

    // ones.

    Known.One.clearAllBits();

    break;

  case RISCVISD::REMUW: {

    KnownBits Known2;

    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

    Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

    // We only care about the lower 32 bits.

    Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));

    // Restore the original width by sign extending.

    Known = Known.sext(BitWidth);

    break;

  }

  case RISCVISD::DIVUW: {

    KnownBits Known2;

    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

    Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

    // We only care about the lower 32 bits.

    Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));

    // Restore the original width by sign extending.

    Known = Known.sext(BitWidth);

    break;

  }

  case RISCVISD::SLLW: {

    KnownBits Known2;

    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

    Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

    Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));

    // Restore the original width by sign extending.

    Known = Known.sext(BitWidth);

    break;

  }

  case RISCVISD::SRLW: {

    KnownBits Known2;

    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

    Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

    Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));

    // Restore the original width by sign extending.

    Known = Known.sext(BitWidth);

    break;

  }

  case RISCVISD::SRAW: {

    KnownBits Known2;

    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

    Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

    Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));

    // Restore the original width by sign extending.

    Known = Known.sext(BitWidth);

    break;

  }

  case RISCVISD::SHL_ADD: {

    KnownBits Known2;

    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

    unsigned ShAmt = Op.getConstantOperandVal(1);

    Known <<= ShAmt;

    Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown

    Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);

    Known = KnownBits::add(Known, Known2);

    break;

  }

  case RISCVISD::CTZW: {

    KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);

    unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();

    unsigned LowBits = llvm::bit_width(PossibleTZ);

    Known.Zero.setBitsFrom(LowBits);

    break;

  }

  case RISCVISD::CLZW: {

    KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);

    unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();

    unsigned LowBits = llvm::bit_width(PossibleLZ);

    Known.Zero.setBitsFrom(LowBits);

    break;

  }

  case RISCVISD::CLSW: {

    // The upper 32 bits are ignored by the instruction, but ComputeNumSignBits

    // doesn't give us a way to ignore them. If there are fewer than 33 sign

    // bits in the input consider it as having no redundant sign bits. Otherwise

    // the lower bound of the result is NumSignBits-33. The maximum value of the

    // the result is 31.

    unsigned NumSignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);

    unsigned MinRedundantSignBits = NumSignBits < 33 ? 0 : NumSignBits - 33;

    // Create a ConstantRange [MinRedundantSignBits, 32) and convert it to

    // KnownBits.

    ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),

                        APInt(BitWidth, 32));

    Known = Range.toKnownBits();

    break;

  }

  case RISCVISD::BREV8:

  case RISCVISD::ORC_B: {

    // FIXME: This is based on the non-ratified Zbp GREV and GORC where a

    // control value of 7 is equivalent to brev8 and orc.b.

    Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);

    bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;

    // To compute zeros for ORC_B, we need to invert the value and invert it

    // back after. This inverting is harmless for BREV8.

    Known.Zero =

        ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);

    Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);

    break;

  }

  case RISCVISD::USATI: {

    unsigned Width = Op.getConstantOperandVal(1);

    Known.Zero.setBitsFrom(Width);

    break;

  }

  case RISCVISD::READ_VLENB: {

    // We can use the minimum and maximum VLEN values to bound VLENB.  We

    // know VLEN must be a power of two.

    const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;

    const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;

    assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");

    Known.Zero.setLowBits(Log2_32(MinVLenB));

    Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);

    if (MaxVLenB == MinVLenB)

      Known.One.setBit(Log2_32(MinVLenB));

    break;

  }

  case RISCVISD::FCLASS: {

    // fclass will only set one of the low 10 bits.

    Known.Zero.setBitsFrom(10);

    break;

  }

  case ISD::INTRINSIC_W_CHAIN:

  case ISD::INTRINSIC_WO_CHAIN: {

    unsigned IntNo =

        Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);

    switch (IntNo) {

    default:

      // We can't do anything for most intrinsics.

      break;

    case Intrinsic::riscv_vsetvli:

    case Intrinsic::riscv_vsetvlimax: {

      bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;

      unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);

      RISCVVType::VLMUL VLMUL =

          static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));

      unsigned SEW = RISCVVType::decodeVSEW(VSEW);

      auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);

      uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;

      MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;


      // Result of vsetvli must be not larger than AVL.

      if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))

        MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));


      unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;

      if (BitWidth > KnownZeroFirstBit)

        Known.Zero.setBitsFrom(KnownZeroFirstBit);

      break;

    }

    }

    break;

  }

  }

}


void RISCVTargetLowering::computeKnownBitsForTargetInstr(

    GISelValueTracking &Analysis, Register R, KnownBits &Known,

    const APInt &DemandedElts, const MachineRegisterInfo &MRI,

    unsigned Depth) const {

  Known.resetAll();


  const MachineInstr *MI = MRI.getVRegDef(R);

  switch (MI->getOpcode()) {

  default:

    return;

  case RISCV::G_BREV8: {

    Analysis.computeKnownBitsImpl(MI->getOperand(1).getReg(), Known,

                                  DemandedElts, Depth + 1);


    Known.Zero =

        ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, /*IsGORC=*/false);

    Known.One =

        computeGREVOrGORC(Known.One.getZExtValue(), 7, /*IsGORC=*/false);

    return;

  }

  }

}


unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(

    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,

    unsigned Depth) const {

  switch (Op.getOpcode()) {

  default:

    break;

  case RISCVISD::SELECT_CC: {

    unsigned Tmp =

        DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);

    if (Tmp == 1) return 1;  // Early out.

    unsigned Tmp2 =

        DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);

    return std::min(Tmp, Tmp2);

  }

  case RISCVISD::CZERO_EQZ:

  case RISCVISD::CZERO_NEZ:

    // Output is either all zero or operand 0. We can propagate sign bit count

    // from operand 0.

    return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);

  case RISCVISD::NEGW_MAX: {

    // We expand this at isel to negw+max. The result will have 33 sign bits

    // if the input has at least 33 sign bits.

    unsigned Tmp =

        DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);

    if (Tmp < 33) return 1;

    return 33;

  }

  case RISCVISD::SRAW: {

    unsigned Tmp =

        DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);

    // sraw produces at least 33 sign bits. If the input already has more than

    // 33 sign bits sraw, will preserve them.

    // TODO: A more precise answer could be calculated depending on known bits

    // in the shift amount.

    return std::max(Tmp, 33U);

  }

  case RISCVISD::SLLW:

  case RISCVISD::SRLW:

  case RISCVISD::DIVW:

  case RISCVISD::DIVUW:

  case RISCVISD::REMUW:

  case RISCVISD::ROLW:

  case RISCVISD::RORW:

  case RISCVISD::ABSW:

  case RISCVISD::FCVT_W_RV64:

  case RISCVISD::FCVT_WU_RV64:

  case RISCVISD::STRICT_FCVT_W_RV64:

  case RISCVISD::STRICT_FCVT_WU_RV64:

    // TODO: As the result is sign-extended, this is conservatively correct.

    return 33;

  case RISCVISD::SATI: {

    unsigned Width = Op.getConstantOperandVal(1);

    return Op.getScalarValueSizeInBits() - Width;

  }

  case RISCVISD::VMV_X_S: {

    // The number of sign bits of the scalar result is computed by obtaining the

    // element type of the input vector operand, subtracting its width from the

    // XLEN, and then adding one (sign bit within the element type). If the

    // element type is wider than XLen, the least-significant XLEN bits are

    // taken.

    unsigned XLen = Subtarget.getXLen();

    unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();

    if (EltBits <= XLen)

      return XLen - EltBits + 1;

    break;

  }

  case ISD::INTRINSIC_W_CHAIN: {

    unsigned IntNo = Op.getConstantOperandVal(1);

    switch (IntNo) {

    default:

      break;

    case Intrinsic::riscv_masked_atomicrmw_xchg:

    case Intrinsic::riscv_masked_atomicrmw_add:

    case Intrinsic::riscv_masked_atomicrmw_sub:

    case Intrinsic::riscv_masked_atomicrmw_nand:

    case Intrinsic::riscv_masked_atomicrmw_max:

    case Intrinsic::riscv_masked_atomicrmw_min:

    case Intrinsic::riscv_masked_atomicrmw_umax:

    case Intrinsic::riscv_masked_atomicrmw_umin:

    case Intrinsic::riscv_masked_cmpxchg:

      // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated

      // narrow atomic operation. These are implemented using atomic

      // operations at the minimum supported atomicrmw/cmpxchg width whose

      // result is then sign extended to XLEN. With +A, the minimum width is

      // 32 for both 64 and 32.

      assert(getMinCmpXchgSizeInBits() == 32);

      assert(Subtarget.hasStdExtZalrsc());

      return Op.getValueSizeInBits() - 31;

    }

    break;

  }

  }


  return 1;

}


bool RISCVTargetLowering::SimplifyDemandedBitsForTargetNode(

    SDValue Op, const APInt &OriginalDemandedBits,

    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,

    unsigned Depth) const {

  unsigned BitWidth = OriginalDemandedBits.getBitWidth();


  switch (Op.getOpcode()) {

  case RISCVISD::BREV8:

  case RISCVISD::ORC_B: {

    KnownBits Known2;

    bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;

    // For BREV8, we need to do BREV8 on the demanded bits.

    // For ORC_B, any bit in the output demandeds all bits from the same byte.

    // So we need to do ORC_B on the demanded bits.

    APInt DemandedBits =

        APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),

                                          7, IsGORC));

    if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,

                             OriginalDemandedElts, Known2, TLO, Depth + 1))

      return true;


    // To compute zeros for ORC_B, we need to invert the value and invert it

    // back after. This inverting is harmless for BREV8.

    Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);

    Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);

    return false;

  }

  }


  return TargetLowering::SimplifyDemandedBitsForTargetNode(

      Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);

}


bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(

    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,

    UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {


  // TODO: Add more target nodes.

  switch (Op.getOpcode()) {

  case RISCVISD::READ_VLENB:

    return false;

  case RISCVISD::SLLW:

  case RISCVISD::SRAW:

  case RISCVISD::SRLW:

  case RISCVISD::RORW:

  case RISCVISD::ROLW:

    // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift

    // amount is bounds.

    return false;

  case RISCVISD::SELECT_CC:

    // Integer comparisons cannot create poison.

    assert(Op.getOperand(0).getValueType().isInteger() &&

           "RISCVISD::SELECT_CC only compares integers");

    return false;

  }

  return TargetLowering::canCreateUndefOrPoisonForTargetNode(

      Op, DemandedElts, DAG, Kind, ConsiderFlags, Depth);

}


const Constant *


RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {

  assert(Ld && "Unexpected null LoadSDNode");

  if (!ISD::isNormalLoad(Ld))

    return nullptr;


  SDValue Ptr = Ld->getBasePtr();


  // Only constant pools with no offset are supported.

  auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {

    auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);

    if (!CNode || CNode->isMachineConstantPoolEntry() ||

        CNode->getOffset() != 0)

      return nullptr;


    return CNode;

  };


  // Simple case, LLA.

  if (Ptr.getOpcode() == RISCVISD::LLA) {

    auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));

    if (!CNode || CNode->getTargetFlags() != 0)

      return nullptr;


    return CNode->getConstVal();

  }


  // Look for a HI and ADD_LO pair.

  if (Ptr.getOpcode() != RISCVISD::ADD_LO ||

      Ptr.getOperand(0).getOpcode() != RISCVISD::HI)

    return nullptr;


  auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));

  auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));


  if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||

      !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)

    return nullptr;


  if (CNodeLo->getConstVal() != CNodeHi->getConstVal())

    return nullptr;


  return CNodeLo->getConstVal();

}


static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,

                                                    MachineBasicBlock *BB) {

  assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");


  // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.

  // Should the count have wrapped while it was being read, we need to try

  // again.

  // For example:

  // ```

  // read:

  //   csrrs x3, counterh # load high word of counter

  //   csrrs x2, counter # load low word of counter

  //   csrrs x4, counterh # load high word of counter

  //   bne x3, x4, read # check if high word reads match, otherwise try again

  // ```


  MachineFunction &MF = *BB->getParent();

  const BasicBlock *LLVMBB = BB->getBasicBlock();

  MachineFunction::iterator It = ++BB->getIterator();


  MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);

  MF.insert(It, LoopMBB);


  MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);

  MF.insert(It, DoneMBB);


  // Transfer the remainder of BB and its successor edges to DoneMBB.

  DoneMBB->splice(DoneMBB->begin(), BB,

                  std::next(MachineBasicBlock::iterator(MI)), BB->end());

  DoneMBB->transferSuccessorsAndUpdatePHIs(BB);


  BB->addSuccessor(LoopMBB);


  MachineRegisterInfo &RegInfo = MF.getRegInfo();

  Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

  Register LoReg = MI.getOperand(0).getReg();

  Register HiReg = MI.getOperand(1).getReg();

  int64_t LoCounter = MI.getOperand(2).getImm();

  int64_t HiCounter = MI.getOperand(3).getImm();

  DebugLoc DL = MI.getDebugLoc();


  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();

  BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)

      .addImm(HiCounter)

      .addReg(RISCV::X0);

  BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)

      .addImm(LoCounter)

      .addReg(RISCV::X0);

  BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)

      .addImm(HiCounter)

      .addReg(RISCV::X0);


  BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))

      .addReg(HiReg)

      .addReg(ReadAgainReg)

      .addMBB(LoopMBB);


  LoopMBB->addSuccessor(LoopMBB);

  LoopMBB->addSuccessor(DoneMBB);


  MI.eraseFromParent();


  return DoneMBB;

}


static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,

                                             MachineBasicBlock *BB,

                                             const RISCVSubtarget &Subtarget) {

  assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");


  MachineFunction &MF = *BB->getParent();

  DebugLoc DL = MI.getDebugLoc();

  const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();

  Register LoReg = MI.getOperand(0).getReg();

  Register HiReg = MI.getOperand(1).getReg();

  Register SrcReg = MI.getOperand(2).getReg();


  const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;

  int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);


  TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,

                          Register());

  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMOLo =

      MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));

  MachineMemOperand *MMOHi = MF.getMachineMemOperand(

      MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));


  // For big-endian, the high part is at offset 0 and the low part at offset 4.

  if (!Subtarget.isLittleEndian())

    std::swap(LoReg, HiReg);


  BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)

      .addFrameIndex(FI)

      .addImm(0)

      .addMemOperand(MMOLo);

  BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)

      .addFrameIndex(FI)

      .addImm(4)

      .addMemOperand(MMOHi);

  MI.eraseFromParent(); // The pseudo instruction is gone now.

  return BB;

}


static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,

                                                 MachineBasicBlock *BB,

                                                 const RISCVSubtarget &Subtarget) {

  assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&

         "Unexpected instruction");


  MachineFunction &MF = *BB->getParent();

  DebugLoc DL = MI.getDebugLoc();

  const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();

  Register DstReg = MI.getOperand(0).getReg();

  Register LoReg = MI.getOperand(1).getReg();

  Register HiReg = MI.getOperand(2).getReg();

  bool KillLo = MI.getOperand(1).isKill();

  bool KillHi = MI.getOperand(2).isKill();


  const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;

  int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);


  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMOLo =

      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));

  MachineMemOperand *MMOHi = MF.getMachineMemOperand(

      MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));


  // For big-endian, store the high part at offset 0 and the low part at

  // offset 4.

  if (!Subtarget.isLittleEndian()) {

    std::swap(LoReg, HiReg);

    std::swap(KillLo, KillHi);

  }


  BuildMI(*BB, MI, DL, TII.get(RISCV::SW))

      .addReg(LoReg, getKillRegState(KillLo))

      .addFrameIndex(FI)

      .addImm(0)

      .addMemOperand(MMOLo);

  BuildMI(*BB, MI, DL, TII.get(RISCV::SW))

      .addReg(HiReg, getKillRegState(KillHi))

      .addFrameIndex(FI)

      .addImm(4)

      .addMemOperand(MMOHi);

  TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register());

  MI.eraseFromParent(); // The pseudo instruction is gone now.

  return BB;

}


static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,

                                        unsigned RelOpcode, unsigned EqOpcode,

                                        const RISCVSubtarget &Subtarget) {

  DebugLoc DL = MI.getDebugLoc();

  Register DstReg = MI.getOperand(0).getReg();

  Register Src1Reg = MI.getOperand(1).getReg();

  Register Src2Reg = MI.getOperand(2).getReg();

  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();

  Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);

  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();


  // Save the current FFLAGS.

  BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);


  auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)

                 .addReg(Src1Reg)

                 .addReg(Src2Reg);

  if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

    MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);


  // Restore the FFLAGS.

  BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))

      .addReg(SavedFFlags, RegState::Kill);


  // Issue a dummy FEQ opcode to raise exception for signaling NaNs.

  auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)

                  .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))

                  .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));

  if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

    MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);


  // Erase the pseudoinstruction.

  MI.eraseFromParent();

  return BB;

}


static MachineBasicBlock *


EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,

                          MachineBasicBlock *ThisMBB,

                          const RISCVSubtarget &Subtarget) {

  // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)

  // Without this, custom-inserter would have generated:

  //

  //   A

  //   | \

  //   |  B

  //   | /

  //   C

  //   | \

  //   |  D

  //   | /

  //   E

  //

  // A: X = ...; Y = ...

  // B: empty

  // C: Z = PHI [X, A], [Y, B]

  // D: empty

  // E: PHI [X, C], [Z, D]

  //

  // If we lower both Select_FPRX_ in a single step, we can instead generate:

  //

  //   A

  //   | \

  //   |  C

  //   | /|

  //   |/ |

  //   |  |

  //   |  D

  //   | /

  //   E

  //

  // A: X = ...; Y = ...

  // D: empty

  // E: PHI [X, A], [X, C], [Y, D]


  const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();

  const DebugLoc &DL = First.getDebugLoc();

  const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();

  MachineFunction *F = ThisMBB->getParent();

  MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);

  MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);

  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

  MachineFunction::iterator It = ++ThisMBB->getIterator();

  F->insert(It, FirstMBB);

  F->insert(It, SecondMBB);

  F->insert(It, SinkMBB);


  // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.

  SinkMBB->splice(SinkMBB->begin(), ThisMBB,

                  std::next(MachineBasicBlock::iterator(First)),

                  ThisMBB->end());

  SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);


  // Fallthrough block for ThisMBB.

  ThisMBB->addSuccessor(FirstMBB);

  // Fallthrough block for FirstMBB.

  FirstMBB->addSuccessor(SecondMBB);

  ThisMBB->addSuccessor(SinkMBB);

  FirstMBB->addSuccessor(SinkMBB);

  // This is fallthrough.

  SecondMBB->addSuccessor(SinkMBB);


  auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());

  Register FLHS = First.getOperand(1).getReg();

  Register FRHS = First.getOperand(2).getReg();

  // Insert appropriate branch.

  BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))

      .addReg(FLHS)

      .addReg(FRHS)

      .addMBB(SinkMBB);


  Register SLHS = Second.getOperand(1).getReg();

  Register SRHS = Second.getOperand(2).getReg();

  Register Op1Reg4 = First.getOperand(4).getReg();

  Register Op1Reg5 = First.getOperand(5).getReg();


  auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());

  // Insert appropriate branch.

  BuildMI(ThisMBB, DL,

          TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))

      .addReg(SLHS)

      .addReg(SRHS)

      .addMBB(SinkMBB);


  Register DestReg = Second.getOperand(0).getReg();

  Register Op2Reg4 = Second.getOperand(4).getReg();

  BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)

      .addReg(Op2Reg4)

      .addMBB(ThisMBB)

      .addReg(Op1Reg4)

      .addMBB(FirstMBB)

      .addReg(Op1Reg5)

      .addMBB(SecondMBB);


  // Now remove the Select_FPRX_s.

  First.eraseFromParent();

  Second.eraseFromParent();

  return SinkMBB;

}


static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,

                                           MachineBasicBlock *BB,

                                           const RISCVSubtarget &Subtarget) {

  // To "insert" Select_* instructions, we actually have to insert the triangle

  // control-flow pattern.  The incoming instructions know the destination vreg

  // to set, the condition code register to branch on, the true/false values to

  // select between, and the condcode to use to select the appropriate branch.

  //

  // We produce the following control flow:

  //     HeadMBB

  //     |  \

  //     |  IfFalseMBB

  //     | /

  //    TailMBB

  //

  // When we find a sequence of selects we attempt to optimize their emission

  // by sharing the control flow. Currently we only handle cases where we have

  // multiple selects with the exact same condition (same LHS, RHS and CC).

  // The selects may be interleaved with other instructions if the other

  // instructions meet some requirements we deem safe:

  // - They are not pseudo instructions.

  // - They are debug instructions. Otherwise,

  // - They do not have side-effects, do not access memory and their inputs do

  //   not depend on the results of the select pseudo-instructions.

  // - They don't adjust stack.

  // The TrueV/FalseV operands of the selects cannot depend on the result of

  // previous selects in the sequence.

  // These conditions could be further relaxed. See the X86 target for a

  // related approach and more information.

  //

  // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))

  // is checked here and handled by a separate function -

  // EmitLoweredCascadedSelect.


  auto Next = next_nodbg(MI.getIterator(), BB->instr_end());

  if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&

      MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&

      Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&

      Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&

      Next->getOperand(5).isKill())

    return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);


  Register LHS = MI.getOperand(1).getReg();

  Register RHS;

  if (MI.getOperand(2).isReg())

    RHS = MI.getOperand(2).getReg();

  auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());


  SmallVector<MachineInstr *, 4> SelectDebugValues;

  SmallSet<Register, 4> SelectDests;

  SelectDests.insert(MI.getOperand(0).getReg());


  MachineInstr *LastSelectPseudo = &MI;

  const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();


  for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);

       SequenceMBBI != E; ++SequenceMBBI) {

    if (SequenceMBBI->isDebugInstr())

      continue;

    if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {

      if (SequenceMBBI->getOperand(1).getReg() != LHS ||

          !SequenceMBBI->getOperand(2).isReg() ||

          SequenceMBBI->getOperand(2).getReg() != RHS ||

          SequenceMBBI->getOperand(3).getImm() != CC ||

          SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||

          SelectDests.count(SequenceMBBI->getOperand(5).getReg()))

        break;

      LastSelectPseudo = &*SequenceMBBI;

      SequenceMBBI->collectDebugValues(SelectDebugValues);

      SelectDests.insert(SequenceMBBI->getOperand(0).getReg());

      continue;

    }

    if (SequenceMBBI->hasUnmodeledSideEffects() ||

        SequenceMBBI->mayLoadOrStore() ||

        SequenceMBBI->usesCustomInsertionHook() ||

        TII.isFrameInstr(*SequenceMBBI) ||

        SequenceMBBI->isStackAligningInlineAsm())

      break;

    if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {

          return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());

        }))

      break;

  }


  const BasicBlock *LLVM_BB = BB->getBasicBlock();

  DebugLoc DL = MI.getDebugLoc();

  MachineFunction::iterator I = ++BB->getIterator();


  MachineBasicBlock *HeadMBB = BB;

  MachineFunction *F = BB->getParent();

  MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);

  MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);


  F->insert(I, IfFalseMBB);

  F->insert(I, TailMBB);


  // Set the call frame size on entry to the new basic blocks.

  unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);

  IfFalseMBB->setCallFrameSize(CallFrameSize);

  TailMBB->setCallFrameSize(CallFrameSize);


  // Transfer debug instructions associated with the selects to TailMBB.

  for (MachineInstr *DebugInstr : SelectDebugValues) {

    TailMBB->push_back(DebugInstr->removeFromParent());

  }


  // Move all instructions after the sequence to TailMBB.

  TailMBB->splice(TailMBB->end(), HeadMBB,

                  std::next(LastSelectPseudo->getIterator()), HeadMBB->end());

  // Update machine-CFG edges by transferring all successors of the current

  // block to the new block which will contain the Phi nodes for the selects.

  TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);

  // Set the successors for HeadMBB.

  HeadMBB->addSuccessor(IfFalseMBB);

  HeadMBB->addSuccessor(TailMBB);


  // Insert appropriate branch.

  if (MI.getOperand(2).isImm())

    BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))

        .addReg(LHS)

        .addImm(MI.getOperand(2).getImm())

        .addMBB(TailMBB);

  else

    BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))

        .addReg(LHS)

        .addReg(RHS)

        .addMBB(TailMBB);


  // IfFalseMBB just falls through to TailMBB.

  IfFalseMBB->addSuccessor(TailMBB);


  // Create PHIs for all of the select pseudo-instructions.

  auto SelectMBBI = MI.getIterator();

  auto SelectEnd = std::next(LastSelectPseudo->getIterator());

  auto InsertionPoint = TailMBB->begin();

  while (SelectMBBI != SelectEnd) {

    auto Next = std::next(SelectMBBI);

    if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {

      // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]

      BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),

              TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())

          .addReg(SelectMBBI->getOperand(4).getReg())

          .addMBB(HeadMBB)

          .addReg(SelectMBBI->getOperand(5).getReg())

          .addMBB(IfFalseMBB);

      SelectMBBI->eraseFromParent();

    }

    SelectMBBI = Next;

  }


  F->getProperties().resetNoPHIs();

  return TailMBB;

}


// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.

static const RISCV::RISCVMaskedPseudoInfo *


lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {

  const RISCVVInversePseudosTable::PseudoInfo *Inverse =

      RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);

  assert(Inverse && "Unexpected LMUL and SEW pair for instruction");

  const RISCV::RISCVMaskedPseudoInfo *Masked =

      RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);

  assert(Masked && "Could not find masked instruction for LMUL and SEW pair");

  return Masked;

}


static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,

                                                    MachineBasicBlock *BB,

                                                    unsigned CVTXOpc) {

  DebugLoc DL = MI.getDebugLoc();


  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();


  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();

  Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);


  // Save the old value of FFLAGS.

  BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);


  assert(MI.getNumOperands() == 7);


  // Emit a VFCVT_X_F

  const TargetRegisterInfo *TRI =

      BB->getParent()->getSubtarget().getRegisterInfo();

  const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);

  Register Tmp = MRI.createVirtualRegister(RC);

  BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)

      .add(MI.getOperand(1))

      .add(MI.getOperand(2))

      .add(MI.getOperand(3))

      .add(MachineOperand::CreateImm(7)) // frm = DYN

      .add(MI.getOperand(4))

      .add(MI.getOperand(5))

      .add(MI.getOperand(6))

      .add(MachineOperand::CreateReg(RISCV::FRM,

                                     /*IsDef*/ false,

                                     /*IsImp*/ true));


  // Emit a VFCVT_F_X

  RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);

  unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();

  // There is no E8 variant for VFCVT_F_X.

  assert(Log2SEW >= 4);

  unsigned CVTFOpc =

      lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)

          ->MaskedPseudo;


  BuildMI(*BB, MI, DL, TII.get(CVTFOpc))

      .add(MI.getOperand(0))

      .add(MI.getOperand(1))

      .addReg(Tmp)

      .add(MI.getOperand(3))

      .add(MachineOperand::CreateImm(7)) // frm = DYN

      .add(MI.getOperand(4))

      .add(MI.getOperand(5))

      .add(MI.getOperand(6))

      .add(MachineOperand::CreateReg(RISCV::FRM,

                                     /*IsDef*/ false,

                                     /*IsImp*/ true));


  // Restore FFLAGS.

  BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))

      .addReg(SavedFFLAGS, RegState::Kill);


  // Erase the pseudoinstruction.

  MI.eraseFromParent();

  return BB;

}


static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,

                                     const RISCVSubtarget &Subtarget) {

  unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;

  const TargetRegisterClass *RC;

  switch (MI.getOpcode()) {

  default:

    llvm_unreachable("Unexpected opcode");

  case RISCV::PseudoFROUND_H:

    CmpOpc = RISCV::FLT_H;

    F2IOpc = RISCV::FCVT_W_H;

    I2FOpc = RISCV::FCVT_H_W;

    FSGNJOpc = RISCV::FSGNJ_H;

    FSGNJXOpc = RISCV::FSGNJX_H;

    RC = &RISCV::FPR16RegClass;

    break;

  case RISCV::PseudoFROUND_H_INX:

    CmpOpc = RISCV::FLT_H_INX;

    F2IOpc = RISCV::FCVT_W_H_INX;

    I2FOpc = RISCV::FCVT_H_W_INX;

    FSGNJOpc = RISCV::FSGNJ_H_INX;

    FSGNJXOpc = RISCV::FSGNJX_H_INX;

    RC = &RISCV::GPRF16RegClass;

    break;

  case RISCV::PseudoFROUND_S:

    CmpOpc = RISCV::FLT_S;

    F2IOpc = RISCV::FCVT_W_S;

    I2FOpc = RISCV::FCVT_S_W;

    FSGNJOpc = RISCV::FSGNJ_S;

    FSGNJXOpc = RISCV::FSGNJX_S;

    RC = &RISCV::FPR32RegClass;

    break;

  case RISCV::PseudoFROUND_S_INX:

    CmpOpc = RISCV::FLT_S_INX;

    F2IOpc = RISCV::FCVT_W_S_INX;

    I2FOpc = RISCV::FCVT_S_W_INX;

    FSGNJOpc = RISCV::FSGNJ_S_INX;

    FSGNJXOpc = RISCV::FSGNJX_S_INX;

    RC = &RISCV::GPRF32RegClass;

    break;

  case RISCV::PseudoFROUND_D:

    assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");

    CmpOpc = RISCV::FLT_D;

    F2IOpc = RISCV::FCVT_L_D;

    I2FOpc = RISCV::FCVT_D_L;

    FSGNJOpc = RISCV::FSGNJ_D;

    FSGNJXOpc = RISCV::FSGNJX_D;

    RC = &RISCV::FPR64RegClass;

    break;

  case RISCV::PseudoFROUND_D_INX:

    assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");

    CmpOpc = RISCV::FLT_D_INX;

    F2IOpc = RISCV::FCVT_L_D_INX;

    I2FOpc = RISCV::FCVT_D_L_INX;

    FSGNJOpc = RISCV::FSGNJ_D_INX;

    FSGNJXOpc = RISCV::FSGNJX_D_INX;

    RC = &RISCV::GPRRegClass;

    break;

  }


  const BasicBlock *BB = MBB->getBasicBlock();

  DebugLoc DL = MI.getDebugLoc();

  MachineFunction::iterator I = ++MBB->getIterator();


  MachineFunction *F = MBB->getParent();

  MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);

  MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);


  F->insert(I, CvtMBB);

  F->insert(I, DoneMBB);

  // Move all instructions after the sequence to DoneMBB.

  DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),

                  MBB->end());

  // Update machine-CFG edges by transferring all successors of the current

  // block to the new block which will contain the Phi nodes for the selects.

  DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);

  // Set the successors for MBB.

  MBB->addSuccessor(CvtMBB);

  MBB->addSuccessor(DoneMBB);


  Register DstReg = MI.getOperand(0).getReg();

  Register SrcReg = MI.getOperand(1).getReg();

  Register MaxReg = MI.getOperand(2).getReg();

  int64_t FRM = MI.getOperand(3).getImm();


  const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();

  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();


  Register FabsReg = MRI.createVirtualRegister(RC);

  BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);


  // Compare the FP value to the max value.

  Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);

  auto MIB =

      BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);

  if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

    MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);


  // Insert branch.

  BuildMI(MBB, DL, TII.get(RISCV::BEQ))

      .addReg(CmpReg)

      .addReg(RISCV::X0)

      .addMBB(DoneMBB);


  CvtMBB->addSuccessor(DoneMBB);


  // Convert to integer.

  Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);

  MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);

  if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

    MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);


  // Convert back to FP.

  Register I2FReg = MRI.createVirtualRegister(RC);

  MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);

  if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

    MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);


  // Restore the sign bit.

  Register CvtReg = MRI.createVirtualRegister(RC);

  BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);


  // Merge the results.

  BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)

      .addReg(SrcReg)

      .addMBB(MBB)

      .addReg(CvtReg)

      .addMBB(CvtMBB);


  MI.eraseFromParent();

  return DoneMBB;

}


MachineBasicBlock *


RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

                                                 MachineBasicBlock *BB) const {

  switch (MI.getOpcode()) {

  default:

    llvm_unreachable("Unexpected instr type to insert");

  case RISCV::ReadCounterWide:

    assert(!Subtarget.is64Bit() &&

           "ReadCounterWide is only to be used on riscv32");

    return emitReadCounterWidePseudo(MI, BB);

  case RISCV::Select_GPR_Using_CC_GPR:

  case RISCV::Select_GPR_Using_CC_Imm5_Zibi:

  case RISCV::Select_GPR_Using_CC_SImm5_CV:

  case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:

  case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:

  case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:

  case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:

  case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:

  case RISCV::Select_GPR_Using_CC_UImm7_NDS:

  case RISCV::Select_FPR16_Using_CC_GPR:

  case RISCV::Select_FPR16INX_Using_CC_GPR:

  case RISCV::Select_FPR32_Using_CC_GPR:

  case RISCV::Select_FPR32INX_Using_CC_GPR:

  case RISCV::Select_FPR64_Using_CC_GPR:

  case RISCV::Select_FPR64INX_Using_CC_GPR:

  case RISCV::Select_FPR64IN32X_Using_CC_GPR:

    return emitSelectPseudo(MI, BB, Subtarget);

  case RISCV::BuildPairF64Pseudo:

    return emitBuildPairF64Pseudo(MI, BB, Subtarget);

  case RISCV::SplitF64Pseudo:

    return emitSplitF64Pseudo(MI, BB, Subtarget);

  case RISCV::PseudoQuietFLE_H:

    return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);

  case RISCV::PseudoQuietFLE_H_INX:

    return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);

  case RISCV::PseudoQuietFLT_H:

    return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);

  case RISCV::PseudoQuietFLT_H_INX:

    return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);

  case RISCV::PseudoQuietFLE_S:

    return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);

  case RISCV::PseudoQuietFLE_S_INX:

    return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);

  case RISCV::PseudoQuietFLT_S:

    return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);

  case RISCV::PseudoQuietFLT_S_INX:

    return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);

  case RISCV::PseudoQuietFLE_D:

    return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);

  case RISCV::PseudoQuietFLE_D_INX:

    return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);

  case RISCV::PseudoQuietFLE_D_IN32X:

    return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,

                         Subtarget);

  case RISCV::PseudoQuietFLT_D:

    return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);

  case RISCV::PseudoQuietFLT_D_INX:

    return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);

  case RISCV::PseudoQuietFLT_D_IN32X:

    return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,

                         Subtarget);


  case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:

    return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);

  case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:

    return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);

  case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:

    return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);

  case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:

    return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);

  case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:

    return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);

  case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:

    return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);

  case RISCV::PseudoFROUND_H:

  case RISCV::PseudoFROUND_H_INX:

  case RISCV::PseudoFROUND_S:

  case RISCV::PseudoFROUND_S_INX:

  case RISCV::PseudoFROUND_D:

  case RISCV::PseudoFROUND_D_INX:

  case RISCV::PseudoFROUND_D_IN32X:

    return emitFROUND(MI, BB, Subtarget);

  case RISCV::PROBED_STACKALLOC_DYN:

    return emitDynamicProbedAlloc(MI, BB);

  case TargetOpcode::STATEPOINT:

    // STATEPOINT is a pseudo instruction which has no implicit defs/uses

    // while jal call instruction (where statepoint will be lowered at the end)

    // has implicit def. This def is early-clobber as it will be set at

    // the moment of the call and earlier than any use is read.

    // Add this implicit dead def here as a workaround.

    MI.addOperand(*MI.getMF(),

                  MachineOperand::CreateReg(

                      RISCV::X1, /*isDef*/ true,

                      /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,

                      /*isUndef*/ false, /*isEarlyClobber*/ true));

    [[fallthrough]];

  case TargetOpcode::STACKMAP:

  case TargetOpcode::PATCHPOINT:

    if (!Subtarget.is64Bit())

      reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "

                            "supported on 64-bit targets");

    return emitPatchPoint(MI, BB);

  }

}


void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,

                                                        SDNode *Node) const {

  // If instruction defines FRM operand, conservatively set it as non-dead to

  // express data dependency with FRM users and prevent incorrect instruction

  // reordering.

  if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {

    FRMDef->setIsDead(false);

    return;

  }

  // Add FRM dependency to any instructions with dynamic rounding mode.

  int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);

  if (Idx < 0) {

    // Vector pseudos have FRM index indicated by TSFlags.

    Idx = RISCVII::getFRMOpNum(MI.getDesc());

    if (Idx < 0)

      return;

  }

  if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)

    return;

  // If the instruction already reads FRM, don't add another read.

  if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))

    return;

  MI.addOperand(

      MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));

}


// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect

// values.


static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,

                                   const CCValAssign &VA, const SDLoc &DL,

                                   const RISCVSubtarget &Subtarget) {

  if (VA.needsCustom()) {

    if (VA.getLocVT().isInteger() &&

        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))

      return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);

    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)

      return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);

    if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())

      return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);

    llvm_unreachable("Unexpected Custom handling.");

  }


  switch (VA.getLocInfo()) {

  default:

    llvm_unreachable("Unexpected CCValAssign::LocInfo");

  case CCValAssign::Full:

    break;

  case CCValAssign::BCvt:

    Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);

    break;

  }

  return Val;

}


// The caller is responsible for loading the full value if the argument is

// passed with CCValAssign::Indirect.


static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,

                                const CCValAssign &VA, const SDLoc &DL,

                                const ISD::InputArg &In,

                                const RISCVTargetLowering &TLI) {

  MachineFunction &MF = DAG.getMachineFunction();

  MachineRegisterInfo &RegInfo = MF.getRegInfo();

  EVT LocVT = VA.getLocVT();

  SDValue Val;

  const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());

  Register VReg = RegInfo.createVirtualRegister(RC);

  RegInfo.addLiveIn(VA.getLocReg(), VReg);

  Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);


  // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.

  if (In.isOrigArg()) {

    Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());

    if (OrigArg->getType()->isIntegerTy()) {

      unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();

      // An input zero extended from i31 can also be considered sign extended.

      if ((BitWidth <= 32 && In.Flags.isSExt()) ||

          (BitWidth < 32 && In.Flags.isZExt())) {

        RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

        RVFI->addSExt32Register(VReg);

      }

    }

  }


  if (VA.getLocInfo() == CCValAssign::Indirect)

    return Val;


  return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());

}


static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,

                                   const CCValAssign &VA, const SDLoc &DL,

                                   const RISCVSubtarget &Subtarget) {

  EVT LocVT = VA.getLocVT();


  if (VA.needsCustom()) {

    if (LocVT.isInteger() &&

        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))

      return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);

    if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)

      return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);

    if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())

      return convertToScalableVector(LocVT, Val, DAG, Subtarget);

    llvm_unreachable("Unexpected Custom handling.");

  }


  switch (VA.getLocInfo()) {

  default:

    llvm_unreachable("Unexpected CCValAssign::LocInfo");

  case CCValAssign::Full:

    break;

  case CCValAssign::BCvt:

    Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);

    break;

  }

  return Val;

}


// The caller is responsible for loading the full value if the argument is

// passed with CCValAssign::Indirect.


static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,

                                const CCValAssign &VA, const SDLoc &DL,

                                const RISCVTargetLowering &TLI) {

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  EVT LocVT = VA.getLocVT();

  EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));

  int FI = MFI.CreateFixedObject(LocVT.getStoreSize(), VA.getLocMemOffset(),

                                 /*IsImmutable=*/true);

  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

  SDValue Val = DAG.getLoad(

      LocVT, DL, Chain, FIN,

      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));


  if (VA.getLocInfo() == CCValAssign::Indirect)

    return Val;


  return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());

}


static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,

                                       const CCValAssign &VA,

                                       const CCValAssign &HiVA,

                                       const SDLoc &DL) {

  assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&

         "Unexpected VA");

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  MachineRegisterInfo &RegInfo = MF.getRegInfo();


  assert(VA.isRegLoc() && "Expected register VA assignment");


  Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

  RegInfo.addLiveIn(VA.getLocReg(), LoVReg);

  SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);

  SDValue Hi;

  if (HiVA.isMemLoc()) {

    // Second half of f64 is passed on the stack.

    int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),

                                   /*IsImmutable=*/true);

    SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);

    Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,

                     MachinePointerInfo::getFixedStack(MF, FI));

  } else {

    // Second half of f64 is passed in another GPR.

    Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

    RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);

    Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);

  }


  // For big-endian, swap the order of Lo and Hi when building the pair.

  const RISCVSubtarget &Subtarget = DAG.getSubtarget<RISCVSubtarget>();

  if (!Subtarget.isLittleEndian())

    std::swap(Lo, Hi);


  return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);

}


static SDValue unpackGPRVecOnRV32(SelectionDAG &DAG, SDValue Chain,

                                  const CCValAssign &VA,

                                  const CCValAssign &HiVA, const SDLoc &DL) {

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  MachineRegisterInfo &RegInfo = MF.getRegInfo();


  assert(VA.isRegLoc() && "Expected register VA assignment");


  Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

  RegInfo.addLiveIn(VA.getLocReg(), LoVReg);

  SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);

  SDValue Hi;

  if (HiVA.isMemLoc()) {

    // Second half of f64 is passed on the stack.

    int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),

                                   /*IsImmutable=*/true);

    SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);

    Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,

                     MachinePointerInfo::getFixedStack(MF, FI));

  } else {

    // Second half of f64 is passed in another GPR.

    Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

    RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);

    Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);

  }


  return DAG.getNode(RISCVISD::BuildPairGPRVec, DL, VA.getValVT(), Lo, Hi);

}


// Transform physical registers into virtual registers.


SDValue RISCVTargetLowering::LowerFormalArguments(

    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,

    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,

    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {


  MachineFunction &MF = DAG.getMachineFunction();


  switch (CallConv) {

  default:

    reportFatalUsageError("Unsupported calling convention");

  case CallingConv::C:

  case CallingConv::Fast:

  case CallingConv::PreserveMost:

  case CallingConv::GRAAL:

  case CallingConv::RISCV_VectorCall:

#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:

    CC_VLS_CASE(32)

    CC_VLS_CASE(64)

    CC_VLS_CASE(128)

    CC_VLS_CASE(256)

    CC_VLS_CASE(512)

    CC_VLS_CASE(1024)

    CC_VLS_CASE(2048)

    CC_VLS_CASE(4096)

    CC_VLS_CASE(8192)

    CC_VLS_CASE(16384)

    CC_VLS_CASE(32768)

    CC_VLS_CASE(65536)

#undef CC_VLS_CASE

    break;

  case CallingConv::GHC:

    if (Subtarget.hasStdExtE())

      reportFatalUsageError("GHC calling convention is not supported on RVE!");

    if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())

      reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "

                            "(Zdinx/D) instruction set extensions");

  }


  const Function &Func = MF.getFunction();

  if (Func.hasFnAttribute("interrupt")) {

    if (!Func.arg_empty())

      reportFatalUsageError(

          "Functions with the interrupt attribute cannot have arguments!");


    StringRef Kind =

      MF.getFunction().getFnAttribute("interrupt").getValueAsString();


    constexpr StringLiteral SupportedInterruptKinds[] = {

        "machine",

        "supervisor",

        "rnmi",

        "qci-nest",

        "qci-nonest",

        "SiFive-CLIC-preemptible",

        "SiFive-CLIC-stack-swap",

        "SiFive-CLIC-preemptible-stack-swap",

    };

    if (!llvm::is_contained(SupportedInterruptKinds, Kind))

      reportFatalUsageError(

          "Function interrupt attribute argument not supported!");


    if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())

      reportFatalUsageError(

          "'qci-*' interrupt kinds require Xqciint extension");


    if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())

      reportFatalUsageError(

          "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");


    if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())

      reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");

    const TargetFrameLowering *TFI = Subtarget.getFrameLowering();

    if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))

      reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "

                            "have a frame pointer");

  }


  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  MVT XLenVT = Subtarget.getXLenVT();

  unsigned XLenInBytes = Subtarget.getXLen() / 8;


  // Check if this function has any musttail calls. If so, incoming indirect

  // arg pointers must be saved in virtual registers so they survive across

  // basic blocks (the SelectionDAG is cleared between BBs). Only do this

  // when needed to avoid adding register pressure to non-musttail functions.

  bool HasMusttail = llvm::any_of(Func, [](const BasicBlock &BB) {

    return llvm::any_of(BB, [](const Instruction &I) {

      if (const auto *CI = dyn_cast<CallInst>(&I))

        return CI->isMustTailCall();

      return false;

    });

  });

  // Used with vargs to accumulate store chains.

  std::vector<SDValue> OutChains;


  // Assign locations to all of the incoming arguments.

  SmallVector<CCValAssign, 16> ArgLocs;

  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());


  CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV);


  for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {

    CCValAssign &VA = ArgLocs[i];

    SDValue ArgValue;

    // Passing f64 on RV32D with a soft float ABI must be handled as a special

    // case.

    if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

      assert(VA.needsCustom());

      ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);

    } else if (VA.getLocVT() == MVT::i32 &&

               Subtarget.isPExtPackedDoubleType(VA.getValVT()) &&

               VA.getLocInfo() != CCValAssign::Indirect) {

      assert(VA.needsCustom());

      ArgValue = unpackGPRVecOnRV32(DAG, Chain, VA, ArgLocs[++i], DL);

    } else if (VA.isRegLoc())

      ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);

    else

      ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL, *this);


    if (VA.getLocInfo() == CCValAssign::Indirect) {

      // If the original argument was split and passed by reference (e.g. i128

      // on RV32), we need to load all parts of it here (using the same

      // address). Vectors may be partly split to registers and partly to the

      // stack, in which case the base address is partly offset and subsequent

      // stores are relative to that.

      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,

                                   MachinePointerInfo()));

      unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;

      if (HasMusttail) {

        RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

        Register VReg =

            MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);

        Chain = DAG.getCopyToReg(Chain, DL, VReg, ArgValue);

        RVFI->setIncomingIndirectArg(ArgIndex, VReg);

      }

      unsigned ArgPartOffset = Ins[InsIdx].PartOffset;

      assert(VA.getValVT().isVector() || ArgPartOffset == 0);

      while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {

        CCValAssign &PartVA = ArgLocs[i + 1];

        unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;

        SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);

        if (PartVA.getValVT().isScalableVector())

          Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);

        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);

        InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,

                                     MachinePointerInfo()));

        ++i;

        ++InsIdx;

      }

      continue;

    }

    InVals.push_back(ArgValue);

  }


  if (any_of(ArgLocs,

             [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))

    MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();


  if (IsVarArg) {

    ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());

    unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);

    const TargetRegisterClass *RC = &RISCV::GPRRegClass;

    MachineFrameInfo &MFI = MF.getFrameInfo();

    MachineRegisterInfo &RegInfo = MF.getRegInfo();

    RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();


    // Size of the vararg save area. For now, the varargs save area is either

    // zero or large enough to hold a0-a7.

    int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);

    int FI;


    // If all registers are allocated, then all varargs must be passed on the

    // stack and we don't need to save any argregs.

    if (VarArgsSaveSize == 0) {

      int VaArgOffset = CCInfo.getStackSize();

      FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);

    } else {

      int VaArgOffset = -VarArgsSaveSize;

      FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);


      // If saving an odd number of registers then create an extra stack slot to

      // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures

      // offsets to even-numbered registers remain 2*XLEN-aligned.

      if (Idx % 2) {

        MFI.CreateFixedObject(

            XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);

        VarArgsSaveSize += XLenInBytes;

      }


      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);


      // Copy the integer registers that may have been used for passing varargs

      // to the vararg save area.

      for (unsigned I = Idx; I < ArgRegs.size(); ++I) {

        const Register Reg = RegInfo.createVirtualRegister(RC);

        RegInfo.addLiveIn(ArgRegs[I], Reg);

        SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);

        SDValue Store = DAG.getStore(

            Chain, DL, ArgValue, FIN,

            MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));

        OutChains.push_back(Store);

        FIN =

            DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);

      }

    }


    // Record the frame index of the first variable argument

    // which is a value necessary to VASTART.

    RVFI->setVarArgsFrameIndex(FI);

    RVFI->setVarArgsSaveSize(VarArgsSaveSize);

  }


  // All stores are grouped in one node to allow the matching between

  // the size of Ins and InVals. This only happens for vararg functions.

  if (!OutChains.empty()) {

    assert(IsVarArg && "Only variadic functions should have OutChains");

    OutChains.push_back(Chain);

    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);

  }


  return Chain;

}


/// isEligibleForTailCallOptimization - Check whether the call is eligible

/// for tail call optimization.

/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.

bool RISCVTargetLowering::isEligibleForTailCallOptimization(

    CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,

    const SmallVector<CCValAssign, 16> &ArgLocs) const {


  auto CalleeCC = CLI.CallConv;

  auto &Outs = CLI.Outs;

  auto &Caller = MF.getFunction();

  auto CallerCC = Caller.getCallingConv();


  // Exception-handling functions need a special set of instructions to

  // indicate a return to the hardware. Tail-calling another function would

  // probably break this.

  // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This

  // should be expanded as new function attributes are introduced.

  if (Caller.hasFnAttribute("interrupt"))

    return false;


  bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();


  // Byval parameters hand the function a pointer directly into the stack area

  // we want to reuse during a tail call. Working around this *is* possible

  // but less efficient and uglier in LowerCall. For musttail, there is no

  // workaround today: a byval arg requires a local copy that becomes invalid

  // after the tail call deallocates the caller's frame, so rejecting here

  // (and triggering reportFatalInternalError in LowerCall) is safer than

  // miscompiling.

  for (auto &Arg : Outs)

    if (Arg.Flags.isByVal())

      return false;


  // musttail bypasses the remaining checks: the checks either reject cases

  // we handle specially (indirect args are forwarded via incoming pointers,

  // stack-passed args reuse the matching incoming layout, sret is forwarded

  // like any other pointer arg) or are optimizations not applicable to

  // mandatory tail calls.

  if (IsMustTail)

    return true;


  // Do not tail call opt if the stack is used to pass parameters.

  if (CCInfo.getStackSize() != 0)

    return false;


  // Do not tail call opt if any parameters need to be passed indirectly.

  // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are

  // passed indirectly. The caller allocates stack space for the value and

  // passes a pointer. On a tail call the caller's frame is deallocated

  // before the callee executes, leaving the pointer dangling.

  for (auto &VA : ArgLocs)

    if (VA.getLocInfo() == CCValAssign::Indirect)

      return false;


  // Do not tail call opt if either caller or callee uses struct return

  // semantics.

  auto IsCallerStructRet = Caller.hasStructRetAttr();

  auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();

  if (IsCallerStructRet || IsCalleeStructRet)

    return false;


  // The callee has to preserve all registers the caller needs to preserve.

  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();

  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);

  if (CalleeCC != CallerCC) {

    const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);

    if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))

      return false;

  }


  return true;

}


static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {

  return DAG.getDataLayout().getPrefTypeAlign(

      VT.getTypeForEVT(*DAG.getContext()));

}


// Lower a call to a callseq_start + CALL + callseq_end chain, and add input

// and output parameter nodes.


SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,

                                       SmallVectorImpl<SDValue> &InVals) const {

  SelectionDAG &DAG = CLI.DAG;

  SDLoc &DL = CLI.DL;

  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;

  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;

  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;

  SDValue Chain = CLI.Chain;

  SDValue Callee = CLI.Callee;

  bool &IsTailCall = CLI.IsTailCall;

  CallingConv::ID CallConv = CLI.CallConv;

  bool IsVarArg = CLI.IsVarArg;

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  MVT XLenVT = Subtarget.getXLenVT();

  const CallBase *CB = CLI.CB;


  MachineFunction &MF = DAG.getMachineFunction();

  MachineFunction::CallSiteInfo CSInfo;


  // Set type id for call site info.

  setTypeIdForCallsiteInfo(CB, MF, CSInfo);


  // Analyze the operands of the call, assigning locations to each operand.

  SmallVector<CCValAssign, 16> ArgLocs;

  CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());


  switch (CallConv) {

  case CallingConv::GHC:

    if (Subtarget.hasStdExtE())

      reportFatalUsageError("GHC calling convention is not supported on RVE!");

    break;

  }


  ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV);


  // Check if it's really possible to do a tail call.

  if (IsTailCall)

    IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);


  if (IsTailCall)

    ++NumTailCalls;

  else if (CLI.CB && CLI.CB->isMustTailCall())

    reportFatalInternalError("failed to perform tail call elimination on a "

                             "call site marked musttail");


  // Get a count of how many bytes are to be pushed on the stack.

  unsigned NumBytes = ArgCCInfo.getStackSize();


  // Create local copies for byval args

  SmallVector<SDValue, 8> ByValArgs;

  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {

    ISD::ArgFlagsTy Flags = Outs[i].Flags;

    if (!Flags.isByVal())

      continue;


    SDValue Arg = OutVals[i];

    unsigned Size = Flags.getByValSize();

    Align Alignment = Flags.getNonZeroByValAlign();


    int FI =

        MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);

    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

    SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);


    Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,

                          /*IsVolatile=*/false,

                          /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,

                          MachinePointerInfo(), MachinePointerInfo());

    ByValArgs.push_back(FIPtr);

  }


  if (!IsTailCall)

    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);


  // Copy argument values to their designated locations.

  SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;

  SmallVector<SDValue, 8> MemOpChains;

  SDValue StackPtr;

  for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;

       ++i, ++OutIdx) {

    CCValAssign &VA = ArgLocs[i];

    SDValue ArgValue = OutVals[OutIdx];

    ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;


    // Handle passing f64 on RV32D with a soft float ABI as a special case.

    if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

      assert(VA.isRegLoc() && "Expected register VA assignment");

      assert(VA.needsCustom());

      SDValue SplitF64 = DAG.getNode(

          RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);

      SDValue Lo = SplitF64.getValue(0);

      SDValue Hi = SplitF64.getValue(1);


      // For big-endian, swap the order of Lo and Hi when passing.

      if (!Subtarget.isLittleEndian())

        std::swap(Lo, Hi);


      Register RegLo = VA.getLocReg();

      RegsToPass.push_back(std::make_pair(RegLo, Lo));


      // Get the CCValAssign for the Hi part.

      CCValAssign &HiVA = ArgLocs[++i];


      if (HiVA.isMemLoc()) {

        // Second half of f64 is passed on the stack.

        if (!StackPtr.getNode())

          StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);

        SDValue Address =

            DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,

                        DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));

        // Emit the store.

        MemOpChains.push_back(DAG.getStore(

            Chain, DL, Hi, Address,

            MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));

      } else {

        // Second half of f64 is passed in another GPR.

        Register RegHigh = HiVA.getLocReg();

        RegsToPass.push_back(std::make_pair(RegHigh, Hi));

      }

      continue;

    }


    // Handle passing 64-bit vector on RV32 as a special case.

    if (VA.getLocVT() == MVT::i32 &&

        Subtarget.isPExtPackedDoubleType(VA.getValVT()) &&

        VA.getLocInfo() != CCValAssign::Indirect) {

      assert(VA.isRegLoc() && "Expected register VA assignment");

      assert(VA.needsCustom());

      SDValue SplitGPRVec =

          DAG.getNode(RISCVISD::SplitGPRVec, DL,

                      DAG.getVTList(MVT::i32, MVT::i32), ArgValue);

      SDValue Lo = SplitGPRVec.getValue(0);

      SDValue Hi = SplitGPRVec.getValue(1);


      Register RegLo = VA.getLocReg();

      RegsToPass.push_back(std::make_pair(RegLo, Lo));


      // Get the CCValAssign for the Hi part.

      CCValAssign &HiVA = ArgLocs[++i];


      if (HiVA.isMemLoc()) {

        // Second half of vector is passed on the stack.

        if (!StackPtr.getNode())

          StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);

        SDValue Address =

            DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,

                        DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));

        // Emit the store.

        MemOpChains.push_back(DAG.getStore(

            Chain, DL, Hi, Address,

            MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));

      } else {

        // Second half of vector is passed in another GPR.

        Register RegHigh = HiVA.getLocReg();

        RegsToPass.push_back(std::make_pair(RegHigh, Hi));

      }

      continue;

    }


    // Promote the value if needed.

    // For now, only handle fully promoted and indirect arguments.

    if (VA.getLocInfo() == CCValAssign::Indirect) {

      // For musttail calls, reuse incoming indirect pointers instead of

      // creating new stack temporaries. The incoming pointers point to the

      // caller's caller's frame, which remains valid after a tail call.

      if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {

        RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

        unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;


        // Resolve which formal parameter is being passed at this call

        // position.

        //

        // FIXME: Ins[].OrigArgIndex is Argument::getArgNo() (unfiltered),

        // but Outs[].OrigArgIndex is an index into a filtered arg list

        // (empty types removed, via CallLoweringInfo in the target-

        // independent layer). IncomingIndirectArgs is keyed by the

        // caller's unfiltered Argument::getArgNo(), so we have to walk

        // the caller's formals (same filter) to translate the index.

        // This target-independent asymmetry should be normalized so

        // backends do not need to re-derive the mapping.

        //

        // Steps:

        // 1. Find the call operand at filtered position CallArgIdx.

        // 2. If it is an Argument, use getArgNo() directly (same filter

        //    for caller formals and call operands).

        // 3. Otherwise (computed value), walk the caller's formals and

        //    skip empty types to map the filtered index to getArgNo().

        const Argument *FormalArg = nullptr;

        unsigned FilteredIdx = 0;

        for (const auto &CallArg : CLI.CB->args()) {

          if (CallArg->getType()->isEmptyTy())

            continue;

          if (FilteredIdx == CallArgIdx) {

            FormalArg = dyn_cast<Argument>(CallArg);

            break;

          }

          ++FilteredIdx;

        }


        // For forwarded args, getArgNo() gives the unfiltered index directly.

        // For computed args, walk the caller's formals to resolve it.

        unsigned FormalArgIdx = CallArgIdx;

        if (FormalArg) {

          FormalArgIdx = FormalArg->getArgNo();

        } else {

          FilteredIdx = 0;

          for (const auto &Arg : MF.getFunction().args()) {

            if (Arg.getType()->isEmptyTy())

              continue;

            if (FilteredIdx == CallArgIdx) {

              FormalArgIdx = Arg.getArgNo();

              break;

            }

            ++FilteredIdx;

          }

        }


        Register VReg = RVFI->getIncomingIndirectArg(FormalArgIdx);

        SDValue CopyOp = DAG.getCopyFromReg(Chain, DL, VReg, PtrVT);

        // Thread the CopyFromReg output chain through MemOpChains so the

        // TokenFactor below sequences the copy with any stores we emit

        // for this argument.

        MemOpChains.push_back(CopyOp.getValue(1));

        SDValue IncomingPtr = CopyOp;


        if (!FormalArg) {

          // Computed value: store into the incoming indirect pointer for the

          // same-position formal parameter (musttail guarantees matching

          // prototypes, so types match). The pointer survives the tail call

          // since it points to the caller's caller's frame.

          //

          // The data-flow edge through IncomingPtr already prevents the

          // store from being scheduled before the CopyFromReg. Threading

          // CopyOp.getValue(1) (the copy's output chain) into the store

          // makes that ordering explicit on the chain edge as well, which

          // is the convention for memory ops chaining off their producers.

          MemOpChains.push_back(

              DAG.getStore(CopyOp.getValue(1), DL, ArgValue, IncomingPtr,

                           MachinePointerInfo::getUnknownStack(MF)));

          // Store any split parts at their respective offsets. Scalable

          // vectors need their part offsets multiplied by VSCALE, matching

          // the non-musttail spill path below.

          unsigned ArgPartOffset = Outs[OutIdx].PartOffset;

          while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {

            SDValue PartValue = OutVals[OutIdx + 1];

            unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;

            SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);

            EVT PartVT = PartValue.getValueType();

            if (PartVT.isScalableVector())

              Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);

            SDValue Addr =

                DAG.getNode(ISD::ADD, DL, PtrVT, IncomingPtr, Offset);

            MemOpChains.push_back(

                DAG.getStore(CopyOp.getValue(1), DL, PartValue, Addr,

                             MachinePointerInfo::getUnknownStack(MF)));

            ++i;

            ++OutIdx;

          }

        }

        ArgValue = IncomingPtr;


        // Skip any remaining split parts (for forwarded args, they are

        // covered by the forwarded pointer).

        while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {

          ++i;

          ++OutIdx;

        }

      } else {

        // Store the argument in a stack slot and pass its address.

        Align StackAlign =

            std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),

                     getPrefTypeAlign(ArgValue.getValueType(), DAG));

        TypeSize StoredSize = ArgValue.getValueType().getStoreSize();

        // If the original argument was split (e.g. i128), we need

        // to store the required parts of it here (and pass just one address).

        // Vectors may be partly split to registers and partly to the stack, in

        // which case the base address is partly offset and subsequent stores

        // are relative to that.

        unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;

        unsigned ArgPartOffset = Outs[OutIdx].PartOffset;

        assert(VA.getValVT().isVector() || ArgPartOffset == 0);

        // Calculate the total size to store. We don't have access to what

        // we're actually storing other than performing the loop and collecting

        // the info.

        SmallVector<std::pair<SDValue, SDValue>> Parts;

        while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {

          SDValue PartValue = OutVals[OutIdx + 1];

          unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;

          SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);

          EVT PartVT = PartValue.getValueType();

          if (PartVT.isScalableVector())

            Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);

          StoredSize += PartVT.getStoreSize();

          StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));

          Parts.push_back(std::make_pair(PartValue, Offset));

          ++i;

          ++OutIdx;

        }

        SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);

        int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();

        MemOpChains.push_back(

            DAG.getStore(Chain, DL, ArgValue, SpillSlot,

                         MachinePointerInfo::getFixedStack(MF, FI)));

        for (const auto &Part : Parts) {

          SDValue PartValue = Part.first;

          SDValue PartOffset = Part.second;

          SDValue Address =

              DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);

          MemOpChains.push_back(

              DAG.getStore(Chain, DL, PartValue, Address,

                           MachinePointerInfo::getFixedStack(MF, FI)));

        }

        ArgValue = SpillSlot;

      }

    } else {

      ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);

    }


    // Use local copy if it is a byval arg.

    if (Flags.isByVal())

      ArgValue = ByValArgs[j++];


    if (VA.isRegLoc()) {

      // Queue up the argument copies and emit them at the end.

      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));


      const TargetOptions &Options = DAG.getTarget().Options;

      if (Options.EmitCallSiteInfo)

        CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);

    } else {

      assert(VA.isMemLoc() && "Argument not register or memory");

      assert((!IsTailCall || (CLI.CB && CLI.CB->isMustTailCall())) &&

             "Tail call not allowed if stack is used for passing parameters");


      // Work out the address of the stack slot.

      if (!StackPtr.getNode())

        StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);

      SDValue Address =

          DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,

                      DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));


      // Emit the store.

      MemOpChains.push_back(

          DAG.getStore(Chain, DL, ArgValue, Address,

                       MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));

    }

  }


  // Join the stores, which are independent of one another.

  if (!MemOpChains.empty())

    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);


  SDValue Glue;


  // Build a sequence of copy-to-reg nodes, chained and glued together.

  for (auto &Reg : RegsToPass) {

    Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);

    Glue = Chain.getValue(1);

  }


  // Validate that none of the argument registers have been marked as

  // reserved, if so report an error. Do the same for the return address if this

  // is not a tailcall.

  validateCCReservedRegs(RegsToPass, MF);

  if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))

    MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{

        MF.getFunction(),

        "Return address register required, but has been reserved."});


  // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a

  // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't

  // split it and then direct call can be matched by PseudoCALL.

  bool CalleeIsLargeExternalSymbol = false;

  if (getTargetMachine().getCodeModel() == CodeModel::Large) {

    if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))

      Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);

    else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

      Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);

      CalleeIsLargeExternalSymbol = true;

    }

  } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {

    const GlobalValue *GV = S->getGlobal();

    Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);

  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);

  }


  // The first call operand is the chain and the second is the target address.

  SmallVector<SDValue, 8> Ops;

  Ops.push_back(Chain);

  Ops.push_back(Callee);


  // Add argument registers to the end of the list so that they are

  // known live into the call.

  for (auto &Reg : RegsToPass)

    Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));


  // Add a register mask operand representing the call-preserved registers.

  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);

  assert(Mask && "Missing call preserved mask for calling convention");

  Ops.push_back(DAG.getRegisterMask(Mask));


  // Glue the call to the argument copies, if any.

  if (Glue.getNode())

    Ops.push_back(Glue);


  assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&

         "Unexpected CFI type for a direct call");


  // Emit the call.

  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);


  // Use software guarded branch for large code model non-indirect calls

  // Tail call to external symbol will have a null CLI.CB and we need another

  // way to determine the callsite type

  bool NeedSWGuarded = false;

  if (getTargetMachine().getCodeModel() == CodeModel::Large &&

      MF.getInfo<RISCVMachineFunctionInfo>()->hasCFProtectionBranch() &&

      ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))

    NeedSWGuarded = true;


  // Use special pseudo for returns_twice calls (e.g., setjmp) when

  // cf-protection-branch is enabled, to ensure LPAD is inserted after the call.

  bool NeedLpadCall =

      CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&

      MF.getInfo<RISCVMachineFunctionInfo>()->hasCFProtectionBranch();


  if (IsTailCall) {

    MF.getFrameInfo().setHasTailCall();

    unsigned CallOpc =

        NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;

    SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);

    if (CLI.CFIType)

      Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());

    DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);

    DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));

    return Ret;

  }


  unsigned CallOpc;

  // FIXME: Large Code Model + Zicfilp: SW_GUARDED_CALL takes priority over

  // LPAD_CALL for returns_twice calls, breaking LPAD alignment.

  if (NeedSWGuarded)

    CallOpc = RISCVISD::SW_GUARDED_CALL;

  else if (NeedLpadCall && CLI.CB->isIndirectCall())

    CallOpc = RISCVISD::LPAD_CALL_INDIRECT;

  else if (NeedLpadCall)

    CallOpc = RISCVISD::LPAD_CALL;

  else

    CallOpc = RISCVISD::CALL;

  Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);

  if (CLI.CFIType)

    Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());


  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);

  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));

  Glue = Chain.getValue(1);


  // Mark the end of the call, which is glued to the call itself.

  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);

  Glue = Chain.getValue(1);


  // Assign locations to each value returned by this call.

  SmallVector<CCValAssign, 16> RVLocs;

  CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());

  RetCCInfo.AnalyzeFormalArguments(Ins, RetCC_RISCV);


  // Copy all of the result registers out of their specified physreg.

  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {

    auto &VA = RVLocs[i];

    // Copy the value out

    SDValue RetValue =

        DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);

    // Glue the RetValue to the end of the call sequence

    Chain = RetValue.getValue(1);

    Glue = RetValue.getValue(2);


    if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

      assert(VA.needsCustom());

      SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),

                                             MVT::i32, Glue);

      Chain = RetValue2.getValue(1);

      Glue = RetValue2.getValue(2);


      // For big-endian, swap the order when building the pair.

      SDValue Lo = RetValue;

      SDValue Hi = RetValue2;

      if (!Subtarget.isLittleEndian())

        std::swap(Lo, Hi);


      RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);

    } else if (VA.getLocVT() == MVT::i32 &&

               Subtarget.isPExtPackedDoubleType(VA.getValVT())) {

      assert(VA.needsCustom());

      SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),

                                             MVT::i32, Glue);

      Chain = RetValue2.getValue(1);

      Glue = RetValue2.getValue(2);


      RetValue = DAG.getNode(RISCVISD::BuildPairGPRVec, DL, VA.getValVT(),

                             RetValue, RetValue2);

    } else

      RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);


    InVals.push_back(RetValue);

  }


  return Chain;

}


bool RISCVTargetLowering::CanLowerReturn(

    CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,

    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,

    const Type *RetTy) const {

  SmallVector<CCValAssign, 16> RVLocs;

  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);

  return CCInfo.CheckReturn(Outs, RetCC_RISCV);

}


SDValue


RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,

                                 bool IsVarArg,

                                 const SmallVectorImpl<ISD::OutputArg> &Outs,

                                 const SmallVectorImpl<SDValue> &OutVals,

                                 const SDLoc &DL, SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();


  // Stores the assignment of the return value to a location.

  SmallVector<CCValAssign, 16> RVLocs;


  // Info about the registers and stack slot.

  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,

                 *DAG.getContext());


  CCInfo.AnalyzeCallOperands(Outs, RetCC_RISCV);


  if (CallConv == CallingConv::GHC && !RVLocs.empty())

    reportFatalUsageError("GHC functions return void only");


  SDValue Glue;

  SmallVector<SDValue, 4> RetOps(1, Chain);


  // Copy the result values into the output registers.

  for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {

    SDValue Val = OutVals[OutIdx];

    CCValAssign &VA = RVLocs[i];

    assert(VA.isRegLoc() && "Can only return in registers!");


    if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

      // Handle returning f64 on RV32D with a soft float ABI.

      assert(VA.isRegLoc() && "Expected return via registers");

      assert(VA.needsCustom());

      SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,

                                     DAG.getVTList(MVT::i32, MVT::i32), Val);

      SDValue Lo = SplitF64.getValue(0);

      SDValue Hi = SplitF64.getValue(1);


      // For big-endian, swap the order of Lo and Hi when returning.

      if (!Subtarget.isLittleEndian())

        std::swap(Lo, Hi);


      Register RegLo = VA.getLocReg();

      Register RegHi = RVLocs[++i].getLocReg();


      if (Subtarget.isRegisterReservedByUser(RegLo) ||

          Subtarget.isRegisterReservedByUser(RegHi))

        MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{

            MF.getFunction(),

            "Return value register required, but has been reserved."});


      Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);

      Glue = Chain.getValue(1);

      RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));

      Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);

      Glue = Chain.getValue(1);

      RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));

    } else if (VA.getLocVT() == MVT::i32 &&

               Subtarget.isPExtPackedDoubleType(VA.getValVT())) {

      // Handle returning 64-bit vector on RV32.

      assert(VA.isRegLoc() && "Expected return via registers");

      assert(VA.needsCustom());

      SDValue SplitGPRVec = DAG.getNode(RISCVISD::SplitGPRVec, DL,

                                        DAG.getVTList(MVT::i32, MVT::i32), Val);

      SDValue Lo = SplitGPRVec.getValue(0);

      SDValue Hi = SplitGPRVec.getValue(1);


      Register RegLo = VA.getLocReg();

      Register RegHi = RVLocs[++i].getLocReg();


      if (Subtarget.isRegisterReservedByUser(RegLo) ||

          Subtarget.isRegisterReservedByUser(RegHi))

        MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{

            MF.getFunction(),

            "Return value register required, but has been reserved."});


      Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);

      Glue = Chain.getValue(1);

      RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));

      Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);

      Glue = Chain.getValue(1);

      RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));

    } else {

      // Handle a 'normal' return.

      Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);

      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);


      if (Subtarget.isRegisterReservedByUser(VA.getLocReg()))

        MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{

            MF.getFunction(),

            "Return value register required, but has been reserved."});


      // Guarantee that all emitted copies are stuck together.

      Glue = Chain.getValue(1);

      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

    }

  }


  RetOps[0] = Chain; // Update chain.


  // Add the glue node if we have it.

  if (Glue.getNode()) {

    RetOps.push_back(Glue);

  }


  if (any_of(RVLocs,

             [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))

    MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();


  unsigned RetOpc = RISCVISD::RET_GLUE;

  // Interrupt service routines use different return instructions.

  const Function &Func = DAG.getMachineFunction().getFunction();

  if (Func.hasFnAttribute("interrupt")) {

    if (!Func.getReturnType()->isVoidTy())

      reportFatalUsageError(

          "Functions with the interrupt attribute must have void return type!");


    MachineFunction &MF = DAG.getMachineFunction();

    StringRef Kind =

      MF.getFunction().getFnAttribute("interrupt").getValueAsString();


    if (Kind == "supervisor")

      RetOpc = RISCVISD::SRET_GLUE;

    else if (Kind == "rnmi") {

      assert(Subtarget.hasFeature(RISCV::FeatureStdExtSmrnmi) &&

             "Need Smrnmi extension for rnmi");

      RetOpc = RISCVISD::MNRET_GLUE;

    } else if (Kind == "qci-nest" || Kind == "qci-nonest") {

      assert(Subtarget.hasFeature(RISCV::FeatureVendorXqciint) &&

             "Need Xqciint for qci-(no)nest");

      RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;

    } else

      RetOpc = RISCVISD::MRET_GLUE;

  }


  return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);

}


void RISCVTargetLowering::validateCCReservedRegs(

    const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,

    MachineFunction &MF) const {

  const Function &F = MF.getFunction();


  if (llvm::any_of(Regs, [this](auto Reg) {

        return Subtarget.isRegisterReservedByUser(Reg.first);

      }))

    F.getContext().diagnose(DiagnosticInfoUnsupported{

        F, "Argument register required, but has been reserved."});

}


// Check if the result of the node is only used as a return value, as

// otherwise we can't perform a tail-call.


bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {

  if (N->getNumValues() != 1)

    return false;

  if (!N->hasNUsesOfValue(1, 0))

    return false;


  SDNode *Copy = *N->user_begin();


  if (Copy->getOpcode() == ISD::BITCAST) {

    return isUsedByReturnOnly(Copy, Chain);

  }


  // TODO: Handle additional opcodes in order to support tail-calling libcalls

  // with soft float ABIs.

  if (Copy->getOpcode() != ISD::CopyToReg) {

    return false;

  }


  // If the ISD::CopyToReg has a glue operand, we conservatively assume it

  // isn't safe to perform a tail call.

  if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)

    return false;


  // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.

  bool HasRet = false;

  for (SDNode *Node : Copy->users()) {

    if (Node->getOpcode() != RISCVISD::RET_GLUE)

      return false;

    HasRet = true;

  }

  if (!HasRet)

    return false;


  Chain = Copy->getOperand(0);

  return true;

}


bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {

  return CI->isTailCall();

}


/// getConstraintType - Given a constraint letter, return the type of

/// constraint it is for this target.

RISCVTargetLowering::ConstraintType


RISCVTargetLowering::getConstraintType(StringRef Constraint) const {

  if (Constraint.size() == 1) {

    switch (Constraint[0]) {

    default:

      break;

    case 'f':

    case 'R':

      return C_RegisterClass;

    case 'I':

    case 'J':

    case 'K':

      return C_Immediate;

    case 'A':

      return C_Memory;

    case 's':

    case 'S': // A symbolic address

      return C_Other;

    }

  } else {

    if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")

      return C_RegisterClass;

    if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")

      return C_RegisterClass;

  }

  return TargetLowering::getConstraintType(Constraint);

}


std::pair<unsigned, const TargetRegisterClass *>


RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,

                                                  StringRef Constraint,

                                                  MVT VT) const {

  // First, see if this is a constraint that directly corresponds to a RISC-V

  // register class.

  if (Constraint.size() == 1) {

    switch (Constraint[0]) {

    case 'r':

      // TODO: Support fixed vectors up to XLen for P extension?

      if (VT.isVector())

        break;

      if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())

        return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);

      if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())

        return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);

      if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

        return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);

      return std::make_pair(0U, &RISCV::GPRNoX0RegClass);

    case 'f':

      if (VT == MVT::f16) {

        if (Subtarget.hasStdExtZfhmin())

          return std::make_pair(0U, &RISCV::FPR16RegClass);

        if (Subtarget.hasStdExtZhinxmin())

          return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);

      } else if (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {

        return std::make_pair(0U, &RISCV::FPR16RegClass);

      } else if (VT == MVT::f32) {

        if (Subtarget.hasStdExtF())

          return std::make_pair(0U, &RISCV::FPR32RegClass);

        if (Subtarget.hasStdExtZfinx())

          return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);

      } else if (VT == MVT::f64) {

        if (Subtarget.hasStdExtD())

          return std::make_pair(0U, &RISCV::FPR64RegClass);

        if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

          return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);

        if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())

          return std::make_pair(0U, &RISCV::GPRNoX0RegClass);

      }

      break;

    case 'R':

      if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||

          (VT == MVT::i128 && Subtarget.is64Bit()))

        return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);

      break;

    default:

      break;

    }

  } else if (Constraint == "vr") {

    // Check VM and fractional LMUL first so that those types will use that

    // class instead of VR.

    for (const auto *RC :

         {&RISCV::ZZZ_VMRegClass, &RISCV::ZZZ_VRMF8RegClass,

          &RISCV::ZZZ_VRMF4RegClass, &RISCV::ZZZ_VRMF2RegClass,

          &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,

          &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,

          &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,

          &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,

          &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,

          &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,

          &RISCV::VRN2M4RegClass}) {

      if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))

        return std::make_pair(0U, RC);


      if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {

        MVT ContainerVT = getContainerForFixedLengthVector(VT);

        if (TRI->isTypeLegalForClass(*RC, ContainerVT))

          return std::make_pair(0U, RC);

      }

    }

  } else if (Constraint == "vd") {

    // Check VMNoV0 and fractional LMUL first so that those types will use that

    // class instead of VRNoV0.

    for (const auto *RC :

         {&RISCV::ZZZ_VMNoV0RegClass, &RISCV::ZZZ_VRMF8NoV0RegClass,

          &RISCV::ZZZ_VRMF4NoV0RegClass, &RISCV::ZZZ_VRMF2NoV0RegClass,

          &RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,

          &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,

          &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,

          &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,

          &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,

          &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,

          &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,

          &RISCV::VRN2M4NoV0RegClass}) {

      if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))

        return std::make_pair(0U, RC);


      if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {

        MVT ContainerVT = getContainerForFixedLengthVector(VT);

        if (TRI->isTypeLegalForClass(*RC, ContainerVT))

          return std::make_pair(0U, RC);

      }

    }

  } else if (Constraint == "vm") {

    if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))

      return std::make_pair(0U, &RISCV::VMV0RegClass);


    if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {

      MVT ContainerVT = getContainerForFixedLengthVector(VT);

      // VT here might be coerced to vector with i8 elements, so we need to

      // check if this is a M1 register here instead of checking VMV0RegClass.

      if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))

        return std::make_pair(0U, &RISCV::VMV0RegClass);

    }

  } else if (Constraint == "cr") {

    if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())

      return std::make_pair(0U, &RISCV::GPRF16CRegClass);

    if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())

      return std::make_pair(0U, &RISCV::GPRF32CRegClass);

    if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

      return std::make_pair(0U, &RISCV::GPRPairCRegClass);

    if (!VT.isVector())

      return std::make_pair(0U, &RISCV::GPRCRegClass);

  } else if (Constraint == "cR") {

    if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||

        (VT == MVT::i128 && Subtarget.is64Bit()))

      return std::make_pair(0U, &RISCV::GPRPairCRegClass);

  } else if (Constraint == "cf") {

    if (VT == MVT::f16) {

      if (Subtarget.hasStdExtZfhmin())

        return std::make_pair(0U, &RISCV::FPR16CRegClass);

      if (Subtarget.hasStdExtZhinxmin())

        return std::make_pair(0U, &RISCV::GPRF16CRegClass);

    } else if (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {

      return std::make_pair(0U, &RISCV::FPR16CRegClass);

    } else if (VT == MVT::f32) {

      if (Subtarget.hasStdExtF())

        return std::make_pair(0U, &RISCV::FPR32CRegClass);

      if (Subtarget.hasStdExtZfinx())

        return std::make_pair(0U, &RISCV::GPRF32CRegClass);

    } else if (VT == MVT::f64) {

      if (Subtarget.hasStdExtD())

        return std::make_pair(0U, &RISCV::FPR64CRegClass);

      if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

        return std::make_pair(0U, &RISCV::GPRPairCRegClass);

      if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())

        return std::make_pair(0U, &RISCV::GPRCRegClass);

    }

  }


  // Clang will correctly decode the usage of register name aliases into their

  // official names. However, other frontends like `rustc` do not. This allows

  // users of these frontends to use the ABI names for registers in LLVM-style

  // register constraints.

  unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())

                               .Case("{zero}", RISCV::X0)

                               .Case("{ra}", RISCV::X1)

                               .Case("{sp}", RISCV::X2)

                               .Case("{gp}", RISCV::X3)

                               .Case("{tp}", RISCV::X4)

                               .Case("{t0}", RISCV::X5)

                               .Case("{t1}", RISCV::X6)

                               .Case("{t2}", RISCV::X7)

                               .Cases({"{s0}", "{fp}"}, RISCV::X8)

                               .Case("{s1}", RISCV::X9)

                               .Case("{a0}", RISCV::X10)

                               .Case("{a1}", RISCV::X11)

                               .Case("{a2}", RISCV::X12)

                               .Case("{a3}", RISCV::X13)

                               .Case("{a4}", RISCV::X14)

                               .Case("{a5}", RISCV::X15)

                               .Case("{a6}", RISCV::X16)

                               .Case("{a7}", RISCV::X17)

                               .Case("{s2}", RISCV::X18)

                               .Case("{s3}", RISCV::X19)

                               .Case("{s4}", RISCV::X20)

                               .Case("{s5}", RISCV::X21)

                               .Case("{s6}", RISCV::X22)

                               .Case("{s7}", RISCV::X23)

                               .Case("{s8}", RISCV::X24)

                               .Case("{s9}", RISCV::X25)

                               .Case("{s10}", RISCV::X26)

                               .Case("{s11}", RISCV::X27)

                               .Case("{t3}", RISCV::X28)

                               .Case("{t4}", RISCV::X29)

                               .Case("{t5}", RISCV::X30)

                               .Case("{t6}", RISCV::X31)

                               .Default(RISCV::NoRegister);

  if (XRegFromAlias != RISCV::NoRegister)

    return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);


  // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the

  // TableGen record rather than the AsmName to choose registers for InlineAsm

  // constraints, plus we want to match those names to the widest floating point

  // register type available, manually select floating point registers here.

  //

  // The second case is the ABI name of the register, so that frontends can also

  // use the ABI names in register constraint lists.

  if (Subtarget.hasStdExtF()) {

    unsigned FReg = StringSwitch<unsigned>(Constraint.lower())

                        .Cases({"{f0}", "{ft0}"}, RISCV::F0_F)

                        .Cases({"{f1}", "{ft1}"}, RISCV::F1_F)

                        .Cases({"{f2}", "{ft2}"}, RISCV::F2_F)

                        .Cases({"{f3}", "{ft3}"}, RISCV::F3_F)

                        .Cases({"{f4}", "{ft4}"}, RISCV::F4_F)

                        .Cases({"{f5}", "{ft5}"}, RISCV::F5_F)

                        .Cases({"{f6}", "{ft6}"}, RISCV::F6_F)

                        .Cases({"{f7}", "{ft7}"}, RISCV::F7_F)

                        .Cases({"{f8}", "{fs0}"}, RISCV::F8_F)

                        .Cases({"{f9}", "{fs1}"}, RISCV::F9_F)

                        .Cases({"{f10}", "{fa0}"}, RISCV::F10_F)

                        .Cases({"{f11}", "{fa1}"}, RISCV::F11_F)

                        .Cases({"{f12}", "{fa2}"}, RISCV::F12_F)

                        .Cases({"{f13}", "{fa3}"}, RISCV::F13_F)

                        .Cases({"{f14}", "{fa4}"}, RISCV::F14_F)

                        .Cases({"{f15}", "{fa5}"}, RISCV::F15_F)

                        .Cases({"{f16}", "{fa6}"}, RISCV::F16_F)

                        .Cases({"{f17}", "{fa7}"}, RISCV::F17_F)

                        .Cases({"{f18}", "{fs2}"}, RISCV::F18_F)

                        .Cases({"{f19}", "{fs3}"}, RISCV::F19_F)

                        .Cases({"{f20}", "{fs4}"}, RISCV::F20_F)

                        .Cases({"{f21}", "{fs5}"}, RISCV::F21_F)

                        .Cases({"{f22}", "{fs6}"}, RISCV::F22_F)

                        .Cases({"{f23}", "{fs7}"}, RISCV::F23_F)

                        .Cases({"{f24}", "{fs8}"}, RISCV::F24_F)

                        .Cases({"{f25}", "{fs9}"}, RISCV::F25_F)

                        .Cases({"{f26}", "{fs10}"}, RISCV::F26_F)

                        .Cases({"{f27}", "{fs11}"}, RISCV::F27_F)

                        .Cases({"{f28}", "{ft8}"}, RISCV::F28_F)

                        .Cases({"{f29}", "{ft9}"}, RISCV::F29_F)

                        .Cases({"{f30}", "{ft10}"}, RISCV::F30_F)

                        .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)

                        .Default(RISCV::NoRegister);

    if (FReg != RISCV::NoRegister) {

      assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");

      if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {

        unsigned RegNo = FReg - RISCV::F0_F;

        unsigned DReg = RISCV::F0_D + RegNo;

        return std::make_pair(DReg, &RISCV::FPR64RegClass);

      }

      if (VT == MVT::f32 || VT == MVT::Other)

        return std::make_pair(FReg, &RISCV::FPR32RegClass);

      if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {

        unsigned RegNo = FReg - RISCV::F0_F;

        unsigned HReg = RISCV::F0_H + RegNo;

        return std::make_pair(HReg, &RISCV::FPR16RegClass);

      }

    }

  }


  if (Subtarget.hasVInstructions()) {

    Register VReg = StringSwitch<Register>(Constraint.lower())

                        .Case("{v0}", RISCV::V0)

                        .Case("{v1}", RISCV::V1)

                        .Case("{v2}", RISCV::V2)

                        .Case("{v3}", RISCV::V3)

                        .Case("{v4}", RISCV::V4)

                        .Case("{v5}", RISCV::V5)

                        .Case("{v6}", RISCV::V6)

                        .Case("{v7}", RISCV::V7)

                        .Case("{v8}", RISCV::V8)

                        .Case("{v9}", RISCV::V9)

                        .Case("{v10}", RISCV::V10)

                        .Case("{v11}", RISCV::V11)

                        .Case("{v12}", RISCV::V12)

                        .Case("{v13}", RISCV::V13)

                        .Case("{v14}", RISCV::V14)

                        .Case("{v15}", RISCV::V15)

                        .Case("{v16}", RISCV::V16)

                        .Case("{v17}", RISCV::V17)

                        .Case("{v18}", RISCV::V18)

                        .Case("{v19}", RISCV::V19)

                        .Case("{v20}", RISCV::V20)

                        .Case("{v21}", RISCV::V21)

                        .Case("{v22}", RISCV::V22)

                        .Case("{v23}", RISCV::V23)

                        .Case("{v24}", RISCV::V24)

                        .Case("{v25}", RISCV::V25)

                        .Case("{v26}", RISCV::V26)

                        .Case("{v27}", RISCV::V27)

                        .Case("{v28}", RISCV::V28)

                        .Case("{v29}", RISCV::V29)

                        .Case("{v30}", RISCV::V30)

                        .Case("{v31}", RISCV::V31)

                        .Default(RISCV::NoRegister);

    if (VReg != RISCV::NoRegister) {

      if (TRI->isTypeLegalForClass(RISCV::ZZZ_VMRegClass, VT.SimpleTy))

        return std::make_pair(VReg, &RISCV::ZZZ_VMRegClass);

      if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))

        return std::make_pair(VReg, &RISCV::VRRegClass);

      for (const auto *RC :

           {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {

        if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {

          VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);

          return std::make_pair(VReg, RC);

        }

      }

    }

  }


  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

}


InlineAsm::ConstraintCode


RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {

  // Currently only support length 1 constraints.

  if (ConstraintCode.size() == 1) {

    switch (ConstraintCode[0]) {

    case 'A':

      return InlineAsm::ConstraintCode::A;

    default:

      break;

    }

  }


  return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);

}


void RISCVTargetLowering::LowerAsmOperandForConstraint(

    SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,

    SelectionDAG &DAG) const {

  // Currently only support length 1 constraints.

  if (Constraint.size() == 1) {

    switch (Constraint[0]) {

    case 'I':

      // Validate & create a 12-bit signed immediate operand.

      if (auto *C = dyn_cast<ConstantSDNode>(Op)) {

        uint64_t CVal = C->getSExtValue();

        if (isInt<12>(CVal))

          Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),

                                                    Subtarget.getXLenVT()));

      }

      return;

    case 'J':

      // Validate & create an integer zero operand.

      if (isNullConstant(Op))

        Ops.push_back(

            DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));

      return;

    case 'K':

      // Validate & create a 5-bit unsigned immediate operand.

      if (auto *C = dyn_cast<ConstantSDNode>(Op)) {

        uint64_t CVal = C->getZExtValue();

        if (isUInt<5>(CVal))

          Ops.push_back(

              DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));

      }

      return;

    case 'S':

      TargetLowering::LowerAsmOperandForConstraint(Op, "s", Ops, DAG);

      return;

    default:

      break;

    }

  }

  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);

}


Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,

                                                   Instruction *Inst,

                                                   AtomicOrdering Ord) const {

  if (Subtarget.hasStdExtZtso()) {

    if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)

      return Builder.CreateFence(Ord);

    return nullptr;

  }


  if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)

    return Builder.CreateFence(Ord);

  if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))

    return Builder.CreateFence(AtomicOrdering::Release);

  return nullptr;

}


Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,

                                                    Instruction *Inst,

                                                    AtomicOrdering Ord) const {

  if (Subtarget.hasStdExtZtso()) {

    if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)

      return Builder.CreateFence(Ord);

    return nullptr;

  }


  if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))

    return Builder.CreateFence(AtomicOrdering::Acquire);

  if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&

      Ord == AtomicOrdering::SequentiallyConsistent)

    return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);

  return nullptr;

}


TargetLowering::AtomicExpansionKind


RISCVTargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {

  // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating

  // point operations can't be used in an lr/sc sequence without breaking the

  // forward-progress guarantee.

  if (AI->isFloatingPointOperation() ||

      AI->getOperation() == AtomicRMWInst::UIncWrap ||

      AI->getOperation() == AtomicRMWInst::UDecWrap ||

      AI->getOperation() == AtomicRMWInst::USubCond ||

      AI->getOperation() == AtomicRMWInst::USubSat)

    return AtomicExpansionKind::CmpXChg;


  // Don't expand forced atomics, we want to have __sync libcalls instead.

  if (Subtarget.hasForcedAtomics())

    return AtomicExpansionKind::None;


  unsigned Size = AI->getType()->getPrimitiveSizeInBits();

  if (AI->getOperation() == AtomicRMWInst::Nand) {

    if (Subtarget.hasStdExtZacas() &&

        (Size >= 32 || Subtarget.hasStdExtZabha()))

      return AtomicExpansionKind::CmpXChg;

    if (Size < 32)

      return AtomicExpansionKind::MaskedIntrinsic;

  }


  if (Size < 32 && !Subtarget.hasStdExtZabha())

    return AtomicExpansionKind::MaskedIntrinsic;


  return AtomicExpansionKind::None;

}


static Intrinsic::ID


getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {

  switch (BinOp) {

  default:

    llvm_unreachable("Unexpected AtomicRMW BinOp");

  case AtomicRMWInst::Xchg:

    return Intrinsic::riscv_masked_atomicrmw_xchg;

  case AtomicRMWInst::Add:

    return Intrinsic::riscv_masked_atomicrmw_add;

  case AtomicRMWInst::Sub:

    return Intrinsic::riscv_masked_atomicrmw_sub;

  case AtomicRMWInst::Nand:

    return Intrinsic::riscv_masked_atomicrmw_nand;

  case AtomicRMWInst::Max:

    return Intrinsic::riscv_masked_atomicrmw_max;

  case AtomicRMWInst::Min:

    return Intrinsic::riscv_masked_atomicrmw_min;

  case AtomicRMWInst::UMax:

    return Intrinsic::riscv_masked_atomicrmw_umax;

  case AtomicRMWInst::UMin:

    return Intrinsic::riscv_masked_atomicrmw_umin;

  }

}


Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(

    IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,

    Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {

  // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace

  // the atomic instruction with an AtomicRMWInst::And/Or with appropriate

  // mask, as this produces better code than the LR/SC loop emitted by

  // int_riscv_masked_atomicrmw_xchg.

  if (AI->getOperation() == AtomicRMWInst::Xchg &&

      isa<ConstantInt>(AI->getValOperand())) {

    ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());

    if (CVal->isZero())

      return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,

                                     Builder.CreateNot(Mask, "Inv_Mask"),

                                     AI->getAlign(), Ord);

    if (CVal->isMinusOne())

      return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,

                                     AI->getAlign(), Ord);

  }


  unsigned XLen = Subtarget.getXLen();

  Value *Ordering =

      Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));

  Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};

  Function *LrwOpScwLoop = Intrinsic::getOrInsertDeclaration(

      AI->getModule(),

      getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);


  if (XLen == 64) {

    Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());

    Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());

    ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());

  }


  Value *Result;


  // Must pass the shift amount needed to sign extend the loaded value prior

  // to performing a signed comparison for min/max. ShiftAmt is the number of

  // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which

  // is the number of bits to left+right shift the value in order to

  // sign-extend.

  if (AI->getOperation() == AtomicRMWInst::Min ||

      AI->getOperation() == AtomicRMWInst::Max) {

    const DataLayout &DL = AI->getDataLayout();

    unsigned ValWidth =

        DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());

    Value *SextShamt =

        Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);

    Result = Builder.CreateCall(LrwOpScwLoop,

                                {AlignedAddr, Incr, Mask, SextShamt, Ordering});

  } else {

    Result =

        Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});

  }


  if (XLen == 64)

    Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());

  return Result;

}


TargetLowering::AtomicExpansionKind


RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(

    const AtomicCmpXchgInst *CI) const {

  // Don't expand forced atomics, we want to have __sync libcalls instead.

  if (Subtarget.hasForcedAtomics())

    return AtomicExpansionKind::None;


  unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();

  if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&

      (Size == 8 || Size == 16))

    return AtomicExpansionKind::MaskedIntrinsic;

  return AtomicExpansionKind::None;

}


Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(

    IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,

    Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {

  unsigned XLen = Subtarget.getXLen();

  Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));

  Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;

  if (XLen == 64) {

    CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());

    NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());

    Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());

  }

  Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};

  Value *Result = Builder.CreateIntrinsic(

      CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});

  if (XLen == 64)

    Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());

  return Result;

}


bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,

                                                        EVT DataVT) const {

  // We have indexed loads for all supported EEW types. Indices are always

  // zero extended.

  return Extend.getOpcode() == ISD::ZERO_EXTEND &&

         isTypeLegal(Extend.getValueType()) &&

         isTypeLegal(Extend.getOperand(0).getValueType()) &&

         Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;

}


bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,

                                               EVT VT) const {

  if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())

    return false;


  switch (FPVT.getSimpleVT().SimpleTy) {

  case MVT::f16:

    return Subtarget.hasStdExtZfhmin();

  case MVT::f32:

    return Subtarget.hasStdExtF();

  case MVT::f64:

    return Subtarget.hasStdExtD();

  default:

    return false;

  }

}


unsigned RISCVTargetLowering::getJumpTableEncoding() const {

  // If we are using the small code model, we can reduce size of jump table

  // entry to 4 bytes.

  if (Subtarget.is64Bit() && !isPositionIndependent() &&

      getTargetMachine().getCodeModel() == CodeModel::Small) {

    return MachineJumpTableInfo::EK_Custom32;

  }

  return TargetLowering::getJumpTableEncoding();

}


const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(

    const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,

    unsigned uid, MCContext &Ctx) const {

  assert(Subtarget.is64Bit() && !isPositionIndependent() &&

         getTargetMachine().getCodeModel() == CodeModel::Small);

  return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);

}


bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,

                                                 SDValue &Offset,

                                                 ISD::MemIndexedMode &AM,

                                                 SelectionDAG &DAG) const {

  // Target does not support indexed loads.

  if (!Subtarget.hasVendorXTHeadMemIdx())

    return false;


  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)

    return false;


  Base = Op->getOperand(0);

  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {

    int64_t RHSC = RHS->getSExtValue();

    if (Op->getOpcode() == ISD::SUB)

      RHSC = -(uint64_t)RHSC;


    // The constants that can be encoded in the THeadMemIdx instructions

    // are of the form (sign_extend(imm5) << imm2).

    bool isLegalIndexedOffset = false;

    for (unsigned i = 0; i < 4; i++)

      if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {

        isLegalIndexedOffset = true;

        break;

      }


    if (!isLegalIndexedOffset)

      return false;


    Offset = Op->getOperand(1);

    return true;

  }


  return false;

}


bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,

                                                    SDValue &Offset,

                                                    ISD::MemIndexedMode &AM,

                                                    SelectionDAG &DAG) const {

  EVT VT;

  SDValue Ptr;

  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

    VT = LD->getMemoryVT();

    Ptr = LD->getBasePtr();

  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

    VT = ST->getMemoryVT();

    Ptr = ST->getBasePtr();

  } else

    return false;


  if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))

    return false;


  AM = ISD::PRE_INC;

  return true;

}


bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,

                                                     SDValue &Base,

                                                     SDValue &Offset,

                                                     ISD::MemIndexedMode &AM,

                                                     SelectionDAG &DAG) const {

  if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {

    if (Op->getOpcode() != ISD::ADD)

      return false;


    if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))

      Base = LS->getBasePtr();

    else

      return false;


    if (Base == Op->getOperand(0))

      Offset = Op->getOperand(1);

    else if (Base == Op->getOperand(1))

      Offset = Op->getOperand(0);

    else

      return false;


    AM = ISD::POST_INC;

    return true;

  }


  EVT VT;

  SDValue Ptr;

  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

    VT = LD->getMemoryVT();

    Ptr = LD->getBasePtr();

  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

    VT = ST->getMemoryVT();

    Ptr = ST->getBasePtr();

  } else

    return false;


  if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))

    return false;

  // Post-indexing updates the base, so it's not a valid transform

  // if that's not the same as the load's pointer.

  if (Ptr != Base)

    return false;


  AM = ISD::POST_INC;

  return true;

}


bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,

                                                     EVT VT) const {

  EVT SVT = VT.getScalarType();


  if (!SVT.isSimple())

    return false;


  switch (SVT.getSimpleVT().SimpleTy) {

  case MVT::f16:

    return VT.isVector() ? Subtarget.hasVInstructionsF16()

                         : Subtarget.hasStdExtZfhOrZhinx();

  case MVT::f32:

    return Subtarget.hasStdExtFOrZfinx();

  case MVT::f64:

    return Subtarget.hasStdExtDOrZdinx();

  default:

    break;

  }


  return false;

}


ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {

  // Zacas will use amocas.w which does not require extension.

  return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;

}


ISD::NodeType RISCVTargetLowering::getExtendForAtomicRMWArg(unsigned Op) const {

  // Zaamo will use amo<op>.w which does not require extension.

  if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics())

    return ISD::ANY_EXTEND;


  // Zalrsc pseudo expansions with comparison require sign-extension.

  assert(Subtarget.hasStdExtZalrsc());

  switch (Op) {

  case ISD::ATOMIC_LOAD_MIN:

  case ISD::ATOMIC_LOAD_MAX:

  case ISD::ATOMIC_LOAD_UMIN:

  case ISD::ATOMIC_LOAD_UMAX:

    return ISD::SIGN_EXTEND;

  default:

    break;

  }

  return ISD::ANY_EXTEND;

}


Register RISCVTargetLowering::getExceptionPointerRegister(

    const Constant *PersonalityFn) const {

  return RISCV::X10;

}


Register RISCVTargetLowering::getExceptionSelectorRegister(

    const Constant *PersonalityFn) const {

  return RISCV::X11;

}


bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {

  // Return false to suppress the unnecessary extensions if the LibCall

  // arguments or return value is a float narrower than XLEN on a soft FP ABI.

  if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&

                                  Type.getSizeInBits() < Subtarget.getXLen()))

    return false;


  return true;

}


bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty,

                                                        bool IsSigned) const {

  if (Subtarget.is64Bit() && Ty->isIntegerTy(32))

    return true;


  return IsSigned;

}


bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,

                                                 SDValue C) const {

  // Check integral scalar types.

  if (!VT.isScalarInteger())

    return false;


  // Omit the optimization if the sub target has the M extension and the data

  // size exceeds XLen.

  const bool HasZmmul = Subtarget.hasStdExtZmmul();

  if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())

    return false;


  auto *ConstNode = cast<ConstantSDNode>(C);

  const APInt &Imm = ConstNode->getAPIntValue();


  // Don't do this if the Xqciac extension is enabled and the Imm in simm12.

  if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))

    return false;


  // Break the MUL to a SLLI and an ADD/SUB.

  if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||

      (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())

    return true;


  // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.

  if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&

      ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||

       (Imm - 8).isPowerOf2()))

    return true;


  // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs

  // a pair of LUI/ADDI.

  if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&

      ConstNode->hasOneUse()) {

    APInt ImmS = Imm.ashr(Imm.countr_zero());

    if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||

        (1 - ImmS).isPowerOf2())

      return true;

  }


  return false;

}


bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,

                                                      SDValue ConstNode) const {

  // Let the DAGCombiner decide for vectors.

  EVT VT = AddNode.getValueType();

  if (VT.isVector())

    return true;


  // Let the DAGCombiner decide for larger types.

  if (VT.getScalarSizeInBits() > Subtarget.getXLen())

    return true;


  // It is worse if c1 is simm12 while c1*c2 is not.

  ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));

  ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);

  const APInt &C1 = C1Node->getAPIntValue();

  const APInt &C2 = C2Node->getAPIntValue();

  if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))

    return false;


  // Default to true and let the DAGCombiner decide.

  return true;

}


bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(

    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,

    unsigned *Fast) const {

  if (!VT.isVector() || Subtarget.hasStdExtP()) {

    if (Fast)

      *Fast = Subtarget.enableUnalignedScalarMem();

    return Subtarget.enableUnalignedScalarMem();

  }


  // All vector implementations must support element alignment

  EVT ElemVT = VT.getVectorElementType();

  if (Alignment >= ElemVT.getStoreSize()) {

    if (Fast)

      *Fast = 1;

    return true;

  }


  // Note: We lower an unmasked unaligned vector access to an equally sized

  // e8 element type access.  Given this, we effectively support all unmasked

  // misaligned accesses.  TODO: Work through the codegen implications of

  // allowing such accesses to be formed, and considered fast.

  if (Fast)

    *Fast = Subtarget.enableUnalignedVectorMem();

  return Subtarget.enableUnalignedVectorMem();

}


EVT RISCVTargetLowering::getOptimalMemOpType(

    LLVMContext &Context, const MemOp &Op,

    const AttributeList &FuncAttributes) const {

  if (!Subtarget.hasVInstructions())

    return MVT::Other;


  if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))

    return MVT::Other;


  // We use LMUL1 memory operations here for a non-obvious reason.  Our caller

  // has an expansion threshold, and we want the number of hardware memory

  // operations to correspond roughly to that threshold.  LMUL>1 operations

  // are typically expanded linearly internally, and thus correspond to more

  // than one actual memory operation.  Note that store merging and load

  // combining will typically form larger LMUL operations from the LMUL1

  // operations emitted here, and that's okay because combining isn't

  // introducing new memory operations; it's just merging existing ones.

  // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.

  const unsigned MinVLenInBytes =

      std::min(Subtarget.getRealMinVLen() / 8, 1024U);


  if (Op.size() < MinVLenInBytes)

    // TODO: Figure out short memops.  For the moment, do the default thing

    // which ends up using scalar sequences.

    return MVT::Other;


  // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support

  // fixed vectors.

  if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)

    return MVT::Other;


  // Prefer i8 for non-zero memset as it allows us to avoid materializing

  // a large scalar constant and instead use vmv.v.x/i to do the

  // broadcast.  For everything else, prefer ELenVT to minimize VL and thus

  // maximize the chance we can encode the size in the vsetvli.

  MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());

  MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;


  // Do we have sufficient alignment for our preferred VT?  If not, revert

  // to largest size allowed by our alignment criteria.

  if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {

    Align RequiredAlign(PreferredVT.getStoreSize());

    if (Op.isFixedDstAlign())

      RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());

    if (Op.isMemcpy())

      RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());

    PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);

  }

  return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());

}


bool RISCVTargetLowering::splitValueIntoRegisterParts(

    SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,

    unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {

  bool IsABIRegCopy = CC.has_value();

  EVT ValueVT = Val.getValueType();


  MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;

  if ((ValueVT == PairVT ||

       (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&

        ValueVT == MVT::f64)) &&

      NumParts == 1 && PartVT == MVT::Untyped) {

    // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx

    MVT XLenVT = Subtarget.getXLenVT();

    if (ValueVT == MVT::f64)

      Val = DAG.getBitcast(MVT::i64, Val);

    auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);

    // Always creating an MVT::Untyped part, so always use

    // RISCVISD::BuildGPRPair.

    Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);

    return true;

  }


  if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&

      PartVT == MVT::f32) {

    // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float

    // nan, and cast to f32.

    Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);

    Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);

    Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,

                      DAG.getConstant(0xFFFF0000, DL, MVT::i32));

    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);

    Parts[0] = Val;

    return true;

  }


  if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {

#ifndef NDEBUG

    unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();

    [[maybe_unused]] unsigned ValLMUL =

        divideCeil(ValueVT.getSizeInBits().getKnownMinValue(),

                   ValNF * RISCV::RVVBitsPerBlock);

    unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();

    [[maybe_unused]] unsigned PartLMUL =

        divideCeil(PartVT.getSizeInBits().getKnownMinValue(),

                   PartNF * RISCV::RVVBitsPerBlock);

    assert(ValNF == PartNF && ValLMUL == PartLMUL &&

           "RISC-V vector tuple type only accepts same register class type "

           "TUPLE_INSERT");

#endif


    Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),

                      Val, DAG.getTargetConstant(0, DL, MVT::i32));

    Parts[0] = Val;

    return true;

  }


  if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {

    ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());

    Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);


    LLVMContext &Context = *DAG.getContext();

    EVT ValueEltVT = ValueVT.getVectorElementType();

    EVT PartEltVT = PartVT.getVectorElementType();

    unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();

    unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();

    if (PartVTBitSize % ValueVTBitSize == 0) {

      assert(PartVTBitSize >= ValueVTBitSize);

      // If the element types are different, bitcast to the same element type of

      // PartVT first.

      // Give an example here, we want copy a <vscale x 1 x i8> value to

      // <vscale x 4 x i16>.

      // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert

      // subvector, then we can bitcast to <vscale x 4 x i16>.

      if (ValueEltVT != PartEltVT) {

        if (PartVTBitSize > ValueVTBitSize) {

          unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();

          assert(Count != 0 && "The number of element should not be zero.");

          EVT SameEltTypeVT =

              EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);

          Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);

        }

        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);

      } else {

        Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);

      }

      Parts[0] = Val;

      return true;

    }

  }


  return false;

}


SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(

    SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,

    MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {

  bool IsABIRegCopy = CC.has_value();


  MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;

  if ((ValueVT == PairVT ||

       (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&

        ValueVT == MVT::f64)) &&

      NumParts == 1 && PartVT == MVT::Untyped) {

    // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx

    MVT XLenVT = Subtarget.getXLenVT();


    SDValue Val = Parts[0];

    // Always starting with an MVT::Untyped part, so always use

    // RISCVISD::SplitGPRPair

    Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),

                      Val);

    Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),

                      Val.getValue(1));

    if (ValueVT == MVT::f64)

      Val = DAG.getBitcast(ValueVT, Val);

    return Val;

  }


  if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&

      PartVT == MVT::f32) {

    SDValue Val = Parts[0];


    // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.

    Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);

    Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);

    Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

    return Val;

  }


  if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {

    LLVMContext &Context = *DAG.getContext();

    SDValue Val = Parts[0];

    EVT ValueEltVT = ValueVT.getVectorElementType();

    EVT PartEltVT = PartVT.getVectorElementType();


    unsigned ValueVTBitSize =

        getContainerForFixedLengthVector(ValueVT.getSimpleVT())

            .getSizeInBits()

            .getKnownMinValue();


    unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();

    if (PartVTBitSize % ValueVTBitSize == 0) {

      assert(PartVTBitSize >= ValueVTBitSize);

      EVT SameEltTypeVT = ValueVT;

      // If the element types are different, convert it to the same element type

      // of PartVT.

      // Give an example here, we want copy a <vscale x 1 x i8> value from

      // <vscale x 4 x i16>.

      // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,

      // then we can extract <vscale x 1 x i8>.

      if (ValueEltVT != PartEltVT) {

        unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();

        assert(Count != 0 && "The number of element should not be zero.");

        SameEltTypeVT =

            EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);

        Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);

      }

      if (ValueVT.isFixedLengthVector())

        Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);

      else

        Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);

      return Val;

    }

  }

  return SDValue();

}


bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {

  // When aggressively optimizing for code size, we prefer to use a div

  // instruction, as it is usually smaller than the alternative sequence.

  // TODO: Add vector division?

  bool OptSize = Attr.hasFnAttr(Attribute::MinSize);

  return OptSize && !VT.isVector() &&

         VT.getSizeInBits() <= getMaxDivRemBitWidthSupported();

}


void RISCVTargetLowering::finalizeLowering(MachineFunction &MF) const {

  MF.getFrameInfo().computeMaxCallFrameSize(MF);

  TargetLoweringBase::finalizeLowering(MF);

}


bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {

  // Scalarize zero_ext and sign_ext might stop match to widening instruction in

  // some situation.

  unsigned Opc = N->getOpcode();

  if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)

    return false;

  return true;

}


static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {

  Module *M = IRB.GetInsertBlock()->getModule();

  Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(

      M, Intrinsic::thread_pointer, IRB.getPtrTy());

  return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),

                                IRB.CreateCall(ThreadPointerFunc), Offset);

}


Value *RISCVTargetLowering::getIRStackGuard(

    IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const {

  // Fuchsia provides a fixed TLS slot for the stack cookie.

  // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.

  if (Subtarget.isTargetFuchsia())

    return useTpOffset(IRB, -0x10);


  // Android provides a fixed TLS slot for the stack cookie. See the definition

  // of TLS_SLOT_STACK_GUARD in

  // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h

  if (Subtarget.isTargetAndroid())

    return useTpOffset(IRB, -0x18);


  Module *M = IRB.GetInsertBlock()->getModule();


  if (M->getStackProtectorGuard() == "tls") {

    // Users must specify the offset explicitly

    int Offset = M->getStackProtectorGuardOffset();

    return useTpOffset(IRB, Offset);

  }


  return TargetLowering::getIRStackGuard(IRB, Libcalls);

}


bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,

                                                  Align Alignment) const {

  if (!Subtarget.hasVInstructions())

    return false;


  // Only support fixed vectors if we know the minimum vector size.

  if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())

    return false;


  EVT ScalarType = DataType.getScalarType();

  if (!isLegalElementTypeForRVV(ScalarType))

    return false;


  if (!Subtarget.enableUnalignedVectorMem() &&

      Alignment < ScalarType.getStoreSize())

    return false;


  return true;

}


bool RISCVTargetLowering::isLegalFirstFaultLoad(EVT DataType,

                                                Align Alignment) const {

  if (!Subtarget.hasVInstructions())

    return false;


  EVT ScalarType = DataType.getScalarType();

  if (!isLegalElementTypeForRVV(ScalarType))

    return false;


  if (!Subtarget.enableUnalignedVectorMem() &&

      Alignment < ScalarType.getStoreSize())

    return false;


  return true;

}


MachineInstr *


RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,

                                   MachineBasicBlock::instr_iterator &MBBI,

                                   const TargetInstrInfo *TII) const {

  assert(MBBI->isCall() && MBBI->getCFIType() &&

         "Invalid call instruction for a KCFI check");

  assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},

                      MBBI->getOpcode()));


  MachineOperand &Target = MBBI->getOperand(0);

  Target.setIsRenamable(false);


  return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))

      .addReg(Target.getReg())

      .addImm(MBBI->getCFIType())

      .getInstr();

}


#define GET_REGISTER_MATCHER

#include "RISCVGenAsmMatcher.inc"


Register


RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,

                                       const MachineFunction &MF) const {

  Register Reg = MatchRegisterAltName(RegName);

  if (!Reg)

    Reg = MatchRegisterName(RegName);

  if (!Reg)

    return Reg;


  BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);

  if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))

    reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +

                                StringRef(RegName) + "\"."));

  return Reg;

}


MachineMemOperand::Flags


RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {

  const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);


  if (NontemporalInfo == nullptr)

    return MachineMemOperand::MONone;


  // 1 for default value work as __RISCV_NTLH_ALL

  // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE

  // 3 -> __RISCV_NTLH_ALL_PRIVATE

  // 4 -> __RISCV_NTLH_INNERMOST_SHARED

  // 5 -> __RISCV_NTLH_ALL

  int NontemporalLevel = 5;

  const MDNode *RISCVNontemporalInfo =

      I.getMetadata("riscv-nontemporal-domain");

  if (RISCVNontemporalInfo != nullptr)

    NontemporalLevel =

        cast<ConstantInt>(

            cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))

                ->getValue())

            ->getZExtValue();


  assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&

         "RISC-V target doesn't support this non-temporal domain.");


  NontemporalLevel -= 2;

  MachineMemOperand::Flags Flags = MachineMemOperand::MONone;

  if (NontemporalLevel & 0b1)

    Flags |= MONontemporalBit0;

  if (NontemporalLevel & 0b10)

    Flags |= MONontemporalBit1;


  return Flags;

}


MachineMemOperand::Flags


RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {


  MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();

  MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;

  TargetFlags |= (NodeFlags & MONontemporalBit0);

  TargetFlags |= (NodeFlags & MONontemporalBit1);

  return TargetFlags;

}


bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(

    const MemSDNode &NodeX, const MemSDNode &NodeY) const {

  return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);

}


bool RISCVTargetLowering::isCtpopFast(EVT VT) const {

  if (VT.isVector()) {

    EVT SVT = VT.getVectorElementType();

    // If the element type is legal we can use cpop.v if it is enabled.

    if (isLegalElementTypeForRVV(SVT))

      return Subtarget.hasStdExtZvbb();

    // Don't consider it fast if the type needs to be legalized or scalarized.

    return false;

  }


  return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);

}


unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,

                                                 ISD::CondCode Cond) const {

  return isCtpopFast(VT) ? 0 : 1;

}


bool RISCVTargetLowering::shouldInsertFencesForAtomic(

    const Instruction *I) const {

  if (Subtarget.hasStdExtZalasr()) {

    if (Subtarget.hasStdExtZtso()) {

      // Zalasr + TSO means that atomic_load_acquire and atomic_store_release

      // should be lowered to plain load/store. The easiest way to do this is

      // to say we should insert fences for them, and the fence insertion code

      // will just not insert any fences

      auto *LI = dyn_cast<LoadInst>(I);

      auto *SI = dyn_cast<StoreInst>(I);

      if ((LI &&

           (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||

          (SI &&

           (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {

        // Here, this is a load or store which is seq_cst, and needs a .aq or

        // .rl therefore we shouldn't try to insert fences

        return false;

      }

      // Here, we are a TSO inst that isn't a seq_cst load/store

      return isa<LoadInst>(I) || isa<StoreInst>(I);

    }

    return false;

  }

  // Note that one specific case requires fence insertion for an

  // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather

  // than this hook due to limitations in the interface here.

  return isa<LoadInst>(I) || isa<StoreInst>(I);

}


bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {


  // GISel support is in progress or complete for these opcodes.

  unsigned Op = Inst.getOpcode();

  if (Op == Instruction::Add || Op == Instruction::Sub ||

      Op == Instruction::And || Op == Instruction::Or ||

      Op == Instruction::Xor || Op == Instruction::InsertElement ||

      Op == Instruction::ShuffleVector || Op == Instruction::Load ||

      Op == Instruction::Freeze || Op == Instruction::Store)

    return false;


  if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {

    // Mark RVV intrinsic as supported.

    if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID())) {

      // GISel doesn't support tuple types yet. It also doesn't suport returning

      // a struct containing a scalable vector like vleff.

      if (Inst.getType()->isRISCVVectorTupleTy() ||

          Inst.getType()->isStructTy())

        return true;


      for (unsigned i = 0; i < II->arg_size(); ++i)

        if (II->getArgOperand(i)->getType()->isRISCVVectorTupleTy())

          return true;


      return false;

    }

    if (II->getIntrinsicID() == Intrinsic::vector_extract ||

        II->getIntrinsicID() == Intrinsic::vector_insert)

      return false;

  }


  if (Inst.getType()->isScalableTy())

    return true;


  for (unsigned i = 0; i < Inst.getNumOperands(); ++i)

    if (Inst.getOperand(i)->getType()->isScalableTy() &&

        !isa<ReturnInst>(&Inst))

      return true;


  return false;

}


SDValue

RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,

                                   SelectionDAG &DAG,

                                   SmallVectorImpl<SDNode *> &Created) const {

  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

  if (isIntDivCheap(N->getValueType(0), Attr))

    return SDValue(N, 0); // Lower SDIV as SDIV


  // Only perform this transform if short forward branch opt is supported.

  if (!Subtarget.hasShortForwardBranchIALU())

    return SDValue();

  EVT VT = N->getValueType(0);

  if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))

    return SDValue();


  // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.

  if (Divisor.sgt(2048) || Divisor.slt(-2048))

    return SDValue();

  return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);

}


bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(

    EVT VT, const APInt &AndMask) const {

  if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())

    return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);

  return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);

}


unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {

  return Subtarget.getMinimumJumpTableEntries();

}


SDValue RISCVTargetLowering::expandIndirectJTBranch(const SDLoc &dl,

                                                    SDValue Value, SDValue Addr,

                                                    int JTI,

                                                    SelectionDAG &DAG) const {

  const MachineFunction &MF = DAG.getMachineFunction();

  if (MF.getInfo<RISCVMachineFunctionInfo>()->hasCFProtectionBranch()) {

    // When cf-protection-branch enabled, we need to use software guarded

    // branch for jump table branch.

    SDValue Chain = Value;

    // Jump table debug info is only needed if CodeView is enabled.

    if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF())

      Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);

    return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);

  }

  return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);

}


// If an output pattern produces multiple instructions tablegen may pick an

// arbitrary type from an instructions destination register class to use for the

// VT of that MachineSDNode. This VT may be used to look up the representative

// register class. If the type isn't legal, the default implementation will

// not find a register class.

//

// Some integer types smaller than XLen are listed in the GPR register class to

// support isel patterns for GISel, but are not legal in SelectionDAG. The

// arbitrary type tablegen picks may be one of these smaller types.

//

// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's

// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.

std::pair<const TargetRegisterClass *, uint8_t>

RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,

                                             MVT VT) const {

  switch (VT.SimpleTy) {

  default:

    break;

  case MVT::i8:

  case MVT::i16:

  case MVT::i32:

    return TargetLowering::findRepresentativeClass(TRI, Subtarget.getXLenVT());

  case MVT::bf16:

  case MVT::f16:

    return TargetLowering::findRepresentativeClass(TRI, MVT::f32);

  }


  return TargetLowering::findRepresentativeClass(TRI, VT);

}


namespace llvm::RISCVVIntrinsicsTable {


#define GET_RISCVVIntrinsicsTable_IMPL

#include "RISCVGenSearchableTables.inc"


} // namespace llvm::RISCVVIntrinsicsTable


bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {


  // If the function specifically requests inline stack probes, emit them.

  if (MF.getFunction().hasFnAttribute("probe-stack"))

    return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==

           "inline-asm";


  return false;

}


unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,

                                                Align StackAlign) const {

  // The default stack probe size is 4096 if the function has no

  // stack-probe-size attribute.

  const Function &Fn = MF.getFunction();

  unsigned StackProbeSize =

      Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);

  // Round down to the stack alignment.

  StackProbeSize = alignDown(StackProbeSize, StackAlign.value());

  return StackProbeSize ? StackProbeSize : StackAlign.value();

}


SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,

                                                     SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();

  if (!hasInlineStackProbe(MF))

    return SDValue();


  MVT XLenVT = Subtarget.getXLenVT();

  // Get the inputs.

  SDValue Chain = Op.getOperand(0);

  SDValue Size = Op.getOperand(1);


  MaybeAlign Align =

      cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();

  SDLoc dl(Op);

  EVT VT = Op.getValueType();


  // Construct the new SP value in a GPR.

  SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);

  Chain = SP.getValue(1);

  SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);

  if (Align)

    SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),

                     DAG.getSignedConstant(-Align->value(), dl, VT));


  // Set the real SP to the new value with a probing loop.

  Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);

  return DAG.getMergeValues({SP, Chain}, dl);

}


MachineBasicBlock *


RISCVTargetLowering::emitDynamicProbedAlloc(MachineInstr &MI,

                                            MachineBasicBlock *MBB) const {

  MachineFunction &MF = *MBB->getParent();

  MachineBasicBlock::iterator MBBI = MI.getIterator();

  DebugLoc DL = MBB->findDebugLoc(MBBI);

  Register TargetReg = MI.getOperand(0).getReg();


  const RISCVInstrInfo *TII = Subtarget.getInstrInfo();

  bool IsRV64 = Subtarget.is64Bit();

  Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();

  const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();

  uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);


  MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());

  MachineBasicBlock *LoopTestMBB =

      MF.CreateMachineBasicBlock(MBB->getBasicBlock());

  MF.insert(MBBInsertPoint, LoopTestMBB);

  MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());

  MF.insert(MBBInsertPoint, ExitMBB);

  Register SPReg = RISCV::X2;

  Register ScratchReg =

      MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);


  // ScratchReg = ProbeSize

  TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);


  // LoopTest:

  //   SUB SP, SP, ProbeSize

  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)

      .addReg(SPReg)

      .addReg(ScratchReg);


  //   s[d|w] zero, 0(sp)

  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,

          TII->get(IsRV64 ? RISCV::SD : RISCV::SW))

      .addReg(RISCV::X0)

      .addReg(SPReg)

      .addImm(0);


  //  BLTU TargetReg, SP, LoopTest

  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLTU))

      .addReg(TargetReg)

      .addReg(SPReg)

      .addMBB(LoopTestMBB);


  // Adjust with: MV SP, TargetReg.

  BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)

      .addReg(TargetReg)

      .addImm(0);


  ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());

  ExitMBB->transferSuccessorsAndUpdatePHIs(MBB);


  LoopTestMBB->addSuccessor(ExitMBB);

  LoopTestMBB->addSuccessor(LoopTestMBB);

  MBB->addSuccessor(LoopTestMBB);


  MI.eraseFromParent();

  MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();

  return ExitMBB->begin()->getParent();

}


ArrayRef<MCPhysReg> RISCVTargetLowering::getRoundingControlRegisters() const {

  if (Subtarget.hasStdExtFOrZfinx()) {

    static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};

    return RCRegs;

  }

  return {};

}


bool RISCVTargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {

  EVT VT = Y.getValueType();


  if (VT.isVector())

    return false;


  return VT.getSizeInBits() <= Subtarget.getXLen();

}


bool RISCVTargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,

                                              SDValue N1) const {

  if (!N0.hasOneUse())

    return false;


  // Avoid reassociating expressions that can be lowered to vector

  // multiply accumulate (i.e. add (mul x, y), z)

  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::MUL &&

      (N0.getValueType().isVector() && Subtarget.hasVInstructions()))

    return false;


  return true;

}


MatchRegisterName
static MCRegister MatchRegisterName(StringRef Name)

getContainerForFixedLengthVector
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
Definition AArch64ISelLowering.cpp:31848

performSHLCombine
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
Definition AArch64ISelLowering.cpp:29562

performORCombine
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition AArch64ISelLowering.cpp:21253

SDValue
return SDValue()

performANDCombine
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition AArch64ISelLowering.cpp:21459

LowerPREFETCH
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:4693

tryWidenMaskForShuffle
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:15114

performSETCCCombine
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:27806

convertToScalableVector
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition AArch64ISelLowering.cpp:31926

convertFromScalableVector
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition AArch64ISelLowering.cpp:31937

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

isConstant
static bool isConstant(const MachineInstr &MI)
Definition AMDGPUInstructionSelector.cpp:3088

Select
AMDGPU Register Bank Select
Definition AMDGPURegBankSelect.cpp:68

isZeroOrAllOnes
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
Definition ARMISelLowering.cpp:12430

combineSelectAndUseCommutative
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:12545

LowerATOMIC_FENCE
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:3986

combineSelectAndUse
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
Definition ARMISelLowering.cpp:12519

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition ARMSLSHardening.cpp:72

MatchRegisterAltName
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.

Results
Function Alias Analysis Results
Definition AliasAnalysis.cpp:808

getTargetNode
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Definition BPFISelLowering.cpp:876

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:853

Analysis
block Block Frequency Analysis
Definition BlockFrequencyInfo.cpp:300

mask
static constexpr unsigned long long mask(BlockVerifier::State S)
Definition BlockVerifier.cpp:16

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

convertValVTToLocVT
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
Definition CSKYISelLowering.cpp:199

unpackFromMemLoc
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Definition CSKYISelLowering.cpp:261

convertLocVTToValVT
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
Definition CSKYISelLowering.cpp:215

emitSelectPseudo
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
Definition CSKYISelLowering.cpp:963

unpackFromRegLoc
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Definition CSKYISelLowering.cpp:229

CommandLine.h

getCost
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI)
Definition CostModel.cpp:73

DiagnosticInfo.h

DiagnosticPrinter.h

GISelValueTracking.h
Provides analysis for querying information about KnownBits during GISel passes.

Check
#define Check(C,...)
Definition GenericConvergenceVerifierImpl.h:34

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

im
#define im(i)

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

_
#define _
Definition HexagonMCCodeEmitter.cpp:46

IRBuilder.h

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

IntrinsicInst.h

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

InstructionCost.h
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

Instructions.h

getValue
static constexpr Value * getValue(Ty &ValueOrUse)
Definition Instrumentor.cpp:824

NumOps
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
Definition ItaniumDemangle.h:3473

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3391

KnownBits.h

CC_VLS_CASE
#define CC_VLS_CASE(ABIVlen)

RegName
#define RegName(no)

Options
static LVOptions Options
Definition LVOptions.cpp:25

performADDCombine
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Definition LoongArchISelLowering.cpp:5915

performVSELECTCombine
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Definition LoongArchISelLowering.cpp:7922

getPrefTypeAlign
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
Definition LoongArchISelLowering.cpp:9572

matchSetCC
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
Definition LoongArchISelLowering.cpp:1172

customLegalizeToWOpWithSExt
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
Definition LoongArchISelLowering.cpp:5228

foldBinOpIntoSelectIfProfitable
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Definition LoongArchISelLowering.cpp:1269

customLegalizeToWOp
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
Definition LoongArchISelLowering.cpp:5194

combine_CC
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Definition LoongArchISelLowering.cpp:6795

performCONCAT_VECTORSCombine
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Definition LoongArchISelLowering.cpp:7909

emitBuildPairF64Pseudo
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
Definition LoongArchISelLowering.cpp:8538

getIntrinsicForMaskedAtomicRMWBinOp
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
Definition LoongArchISelLowering.cpp:10348

translateSetCCForBranch
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
Definition LoongArchISelLowering.cpp:1328

isSplat
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
Definition LowerMatrixIntrinsics.cpp:122

MCCodeEmitter.h

MCInstBuilder.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

G
#define G(x, y, z)
Definition MD5.cpp:55

MachineFrameInfo.h

MachineFunction.h

MachineInstrBuilder.h

MachineJumpTableInfo.h

MachineRegisterInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2126

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2127

MathExtras.h

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

MemoryLocation.h
This file provides utility analysis objects describing memory locations.

performSUBCombine
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
Definition MipsISelLowering.cpp:1018

performSELECTCombine
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
Definition MipsISelLowering.cpp:591

performMULCombine
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
Definition MipsSEISelLowering.cpp:955

performXORCombine
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
Definition MipsSEISelLowering.cpp:1124

performSRACombine
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
Definition MipsSEISelLowering.cpp:1019

OpIdx
MachineInstr unsigned OpIdx
Definition NVPTXPrologEpilogPass.cpp:56

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

getCodeModel
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
Definition PPCAsmPrinter.cpp:497

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

RISCVConstantPoolValue.h

SPReg
static constexpr MCPhysReg SPReg
Definition RISCVFrameLowering.cpp:54

getExtensionType
static StringRef getExtensionType(StringRef Ext)
Definition RISCVISAInfo.cpp:218

combinePExtTruncate
static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16714

emitQuietFCMP
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:23676

processVCIXOperands
static void processVCIXOperands(SDValue OrigOp, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:11629

isLowSourceShuffle
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
Definition RISCVISelLowering.cpp:6241

lowerBuildVectorOfConstants
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:4343

performVECREDUCECombine
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Definition RISCVISelLowering.cpp:20460

lowerVECTOR_SHUFFLEAsVSlide1
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
Definition RISCVISelLowering.cpp:5602

combineTruncToVnclip
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:21032

getExactInteger
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
Definition RISCVISelLowering.cpp:3965

combineMinMaxToSat
static SDValue combineMinMaxToSat(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:21315

unpackGPRVecOnRV32
static SDValue unpackGPRVecOnRV32(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
Definition RISCVISelLowering.cpp:24459

isInterleaveShuffle
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
Definition RISCVISelLowering.cpp:5241

narrowIndex
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
Definition RISCVISelLowering.cpp:17647

getSingleShuffleSrc
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
Definition RISCVISelLowering.cpp:5212

performReverseEVLCombine
static SDValue performReverseEVLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19453

getPACKOpcode
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:4583

splatSplitI64WithVL
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:5069

isLegalBitRotate
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
Definition RISCVISelLowering.cpp:5967

SplitVectorReductionOp
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG, bool IsVP)
Definition RISCVISelLowering.cpp:7750

splatPartsI64WithVL
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:5021

isPromotedOpNeedingSplit
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget, const TargetLowering &TLI)
Definition RISCVISelLowering.cpp:7707

getWideningInterleave
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5824

getAllOnesMask
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
Definition RISCVISelLowering.cpp:3334

simplifyOp_VL
static SDValue simplifyOp_VL(SDNode *N)
Definition RISCVISelLowering.cpp:18799

lowerScalarSplat
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5081

isAlternating
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
Definition RISCVISelLowering.cpp:5306

FPImmCost
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(3))

lookupMaskedIntrinsic
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
Definition RISCVISelLowering.cpp:23972

compressShuffleOfShuffles
static SDValue compressShuffleOfShuffles(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:5552

expandMul
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:17459

performVWADDSUBW_VLCombine
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19108

foldReduceOperandViaVDOT4A
static SDValue foldReduceOperandViaVDOT4A(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Definition RISCVISelLowering.cpp:20374

matchIndexAsWiderOp
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
Definition RISCVISelLowering.cpp:20931

combineOp_VLToVWOp_VL
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
Definition RISCVISelLowering.cpp:18831

combineVFMADD_VLWithVFNEG_VL
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:19638

combineOrOfCZERO
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:17077

performVWABDACombineWV
static SDValue performVWABDACombineWV(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19036

useInversedSetcc
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:20120

ReassocShlAddiAdd
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))

lowerDisjointIndicesShuffle
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
Definition RISCVISelLowering.cpp:6173

combineVWADDSUBWSelect
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:18948

EmitLoweredCascadedSelect
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:23713

performINSERT_VECTOR_ELTCombine
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Definition RISCVISelLowering.cpp:20474

lowerZvzipVZIP
static SDValue lowerZvzipVZIP(SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5732

lowerFABSorFNEG
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:7508

lowerFMAXIMUM_FMINIMUM
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:7435

reverseZExtICmpCombine
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16867

SplitStrictFPVectorOp
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:7787

promoteVCIXScalar
static void promoteVCIXScalar(SDValue Op, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:11593

foldSelectToUSATI
static SDValue foldSelectToUSATI(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:20210

tryDemorganOfBooleanCondition
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:19809

performMemPairCombine
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition RISCVISelLowering.cpp:19182

combineDeMorganOfBoolean
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:16595

lowerVECTOR_SHUFFLEAsVSlidedown
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:5432

reduceANDOfAtomicLoad
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition RISCVISelLowering.cpp:16949

getRVVReductionOp
static unsigned getRVVReductionOp(unsigned ISDOpcode)
Definition RISCVISelLowering.cpp:12216

combineSubShiftToOrcB
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16508

lowerShuffleViaVRegSplitting
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:6014

lowerFCOPYSIGN
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:7531

performP_BITCASTCombine
static SDValue performP_BITCASTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19520

NumRepeatedDivisors
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))

isLegalVTForZvzipOperand
static bool isLegalVTForZvzipOperand(MVT VT, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5227

foldSelectOfCTTZOrCTLZ
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:20058

lowerFP_TO_INT_SAT
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3500

lowerFixedVectorSegLoadIntrinsics
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:11932

combineVectorMulToSraBitcast
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:17560

isLocalRepeatingShuffle
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
Definition RISCVISelLowering.cpp:6224

lowerZvzipVPAIR
static SDValue lowerZvzipVPAIR(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5703

legalizeScatterGatherIndexType
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
Definition RISCVISelLowering.cpp:20861

isSpanSplatShuffle
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
Definition RISCVISelLowering.cpp:6248

getVSlidedown
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
Definition RISCVISelLowering.cpp:3936

combineOrToBitfieldInsert
static SDValue combineOrToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:17152

getVCIXISDNodeVOID
static SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG, unsigned Type)
Definition RISCVISelLowering.cpp:11922

getRISCVVLOp
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
Definition RISCVISelLowering.cpp:7592

getVecReduceOpcode
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
Definition RISCVISelLowering.cpp:15808

getDefaultVLOps
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3350

isPairEven
static bool isPairEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a pair-even idiom.
Definition RISCVISelLowering.cpp:5330

ZvfbfaOps
static const unsigned ZvfbfaOps[]
Definition RISCVISelLowering.cpp:92

performFP_TO_INT_SATCombine
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19365

lowerReductionSeq
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
Definition RISCVISelLowering.cpp:12348

expandMulToAddOrSubOfShl
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
Definition RISCVISelLowering.cpp:17351

lowerZvzipVUNZIP
static SDValue lowerZvzipVUNZIP(unsigned Opc, SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5758

lowerGetVectorLength
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:11535

getDefaultScalableVLOps
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3341

getVLOperand
static SDValue getVLOperand(SDValue Op)
Definition RISCVISelLowering.cpp:3172

combineVdot4aAccum
static SDValue combineVdot4aAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:20797

performVECTOR_SHUFFLECombine
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Definition RISCVISelLowering.cpp:20670

performVP_STORECombine
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19536

emitFROUND
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:24045

getLargeExternalSymbol
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:9534

ModeMask64
const uint64_t ModeMask64
Definition RISCVISelLowering.cpp:14796

lowerVectorIntrinsicScalars
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:11354

ExtensionMaxWebSize
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))

combineShlAddIAddImpl
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:16099

getDeinterleaveShiftAndTrunc
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:5359

combineBinOpOfZExt
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:16323

matchSelectAddSub
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
Definition RISCVISelLowering.cpp:20151

performSIGN_EXTEND_INREGCombine
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:17932

combineXorToBitfieldInsert
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:17116

combineANDOfSETCCToCZERO
static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16913

getSmallestVTForIndex
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:10905

useRVVForFixedLengthVectorVT
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3185

combineAddMulh
static SDValue combineAddMulh(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16400

useTpOffset
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
Definition RISCVISelLowering.cpp:26622

combineAddOfBooleanXor
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:16365

getZeroPaddedAdd
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
Definition RISCVISelLowering.cpp:20356

ModeMask32
const uint32_t ModeMask32
Definition RISCVISelLowering.cpp:14797

combineTruncOfSraSext
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:20978

getVSlideup
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
Definition RISCVISelLowering.cpp:3948

emitSplitF64Pseudo
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:23591

emitVFROUND_NOEXCEPT_MASK
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
Definition RISCVISelLowering.cpp:23982

SplitVectorOp
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:7725

combineToVCPOP
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:21168

negateFMAOpcode
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
Definition RISCVISelLowering.cpp:19600

lowerScalarInsert
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5135

lowerBuildVectorViaVID
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:4159

m_ReverseEVL
static auto m_ReverseEVL
Matches a reverse shifted right EVL elements, or a vp.reverse.
Definition RISCVISelLowering.cpp:19447

transformAddShlImm
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16054

tryFoldSelectIntoOp
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
Definition RISCVISelLowering.cpp:20004

VP_CASE
#define VP_CASE(NODE)

lowerBitreverseShuffle
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5917

lowerConstant
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:7234

matchIndexAsShuffle
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
Definition RISCVISelLowering.cpp:20896

performVFMADD_VLCombine
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19677

lowerFixedVectorSegStoreIntrinsics
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:12077

combineBinOpToReduce
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:15947

combineNarrowableShiftedLoad
static SDValue combineNarrowableShiftedLoad(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:16987

lowerBUILD_VECTOR
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:4660

widenVectorOpsToi8
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:12963

lowerINT_TO_FP
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3473

lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3675

lowerFTRUNC_FCEIL_FFLOOR_FROUND
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3867

isSimpleVIDSequence
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
Definition RISCVISelLowering.cpp:3998

getVCIXISDNodeWCHAIN
static SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG, unsigned Type)
Definition RISCVISelLowering.cpp:11886

lowerVectorXRINT_XROUND
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3895

computeGREVOrGORC
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
Definition RISCVISelLowering.cpp:23089

lowerVECTOR_SHUFFLEAsRotate
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:5986

isSimm12Constant
static bool isSimm12Constant(SDValue V)
Definition RISCVISelLowering.cpp:9813

matchRoundingOp
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
Definition RISCVISelLowering.cpp:3638

lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3766

combineTruncSelectToSMaxUSat
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:16645

isElementRotate
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
Definition RISCVISelLowering.cpp:5298

performBITREVERSECombine
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19428

transformAddImmMulImm
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16263

combineSubOfBoolean
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:16464

matchSplatAsGather
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:4104

isValidEGW
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:11652

lowerVECTOR_SHUFFLEAsVRGatherVX
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
Definition RISCVISelLowering.cpp:5383

lowerCttzElts
static SDValue lowerCttzElts(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:11570

isNonZeroAVL
static bool isNonZeroAVL(SDValue AVL)
Definition RISCVISelLowering.cpp:12339

lowerFP_TO_INT
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3613

getShlAddShlAdd
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX, unsigned ShY, bool AddX, unsigned Shift)
Definition RISCVISelLowering.cpp:17385

getQDOTXResultType
static MVT getQDOTXResultType(MVT OpVT)
Definition RISCVISelLowering.cpp:20347

lowerVECTOR_SHUFFLEAsVSlideup
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:5507

getLargeGlobalAddress
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:9524

emitReadCounterWidePseudo
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
Definition RISCVISelLowering.cpp:23526

isPairOdd
static bool isPairOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a pair-odd idiom.
Definition RISCVISelLowering.cpp:5346

getWideningSpread
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
Definition RISCVISelLowering.cpp:5799

AllowSplatInVW_W
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))

unpackF64OnRV32DSoftABI
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
Definition RISCVISelLowering.cpp:24421

foldConcatVector
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
Definition RISCVISelLowering.cpp:5189

tryMemPairCombine
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
Definition RISCVISelLowering.cpp:19124

performVWABDACombine
static SDValue performVWABDACombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:18992

getRVVFPReductionOpAndOperands
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:12432

performFP_TO_INTCombine
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:19267

combineBinOpOfExtractToReduceTree
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
Definition RISCVISelLowering.cpp:15845

lowerBuildVectorViaPacking
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
Definition RISCVISelLowering.cpp:4603

performTRUNCATECombine
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16817

lowerSelectToBinOp
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:9817

combineShlAddIAdd
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:16138

lowerBuildVectorViaDominantValues
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
Definition RISCVISelLowering.cpp:4238

isCompressMask
static bool isCompressMask(ArrayRef< int > Mask)
Definition RISCVISelLowering.cpp:6142

expandMulToNAFSequence
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
Definition RISCVISelLowering.cpp:17326

combineToVWMACC
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:20732

expandMulToShlAddShlAdd
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt, unsigned Shift)
Definition RISCVISelLowering.cpp:17412

combineVectorSizedSetCCEquality
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
Definition RISCVISelLowering.cpp:17715

performBUILD_VECTORCombine
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
Definition RISCVISelLowering.cpp:20289

OP_CASE
#define OP_CASE(NODE)

combineOrAndToBitfieldInsert
static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:17190

RISCVISelLowering.h

getMaskTypeFor
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
Definition RISCVLegalizerInfo.cpp:1057

getRISCVWOpcode
static unsigned getRISCVWOpcode(unsigned Opcode)
Definition RISCVLegalizerInfo.cpp:1415

RISCVMachineFunctionInfo.h

RISCVMatInt.h

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:73

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

RISCVRegisterInfo.h

RISCVSelectionDAGInfo.h

RISCVSubtarget.h

RISCV.h

SDPatternMatch.h
Contains matchers for matching SelectionDAG nodes and values.

ROTR
#define ROTR(x, n)
Definition SHA256.cpp:32

isCommutative
static bool isCommutative(Instruction *I, Value *ValWithUses, bool IsCopyable=false)
Definition SLPVectorizer.cpp:618

getValueType
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
Definition SLPVectorizer.cpp:322

SelectionDAGAddressAnalysis.h

SmallSet.h
This file defines the SmallSet class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

Debug.h

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

TargetLoweringObjectFileImpl.h

ValueTracking.h

ValueTypes.h

VectorUtils.h

Concat
static constexpr int Concat[]
Definition X86InterleavedAccess.cpp:232

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

Input
The Input class is used to parse a yaml document into in-memory structs and vectors.
Definition YAMLTraits.h:1314

Node
Definition ItaniumDemangle.h:166

llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344

llvm::APFloatBase::semanticsPrecision
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:227

llvm::APFloatBase::opInvalidOp
@ opInvalidOp
Definition APFloat.h:362

llvm::APFloat
Definition APFloat.h:1029

llvm::APFloat::convertFromAPInt
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406

llvm::APFloat::convertToInteger
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1391

llvm::APFloat::getNaN
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1168

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::isNegatedPowerOf2
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450

llvm::APInt::getSignMask
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563

llvm::APInt::setBitsFrom
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408

llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535

llvm::APInt::trunc
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968

llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353

llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208

llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372

llvm::APInt::ugt
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189

llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511

llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118

llvm::APInt::getSignedMaxValue
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210

llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330

llvm::APInt::sdiv
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1687

llvm::APInt::clearAllBits
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419

llvm::APInt::countr_zero
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662

llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436

llvm::APInt::getSplat
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652

llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220

llvm::APInt::getSignificantBits
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554

llvm::APInt::insertBits
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398

llvm::APInt::isShiftedMask
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511

llvm::APInt::srem
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1788

llvm::APInt::isMask
bool isMask(unsigned numBits) const
Definition APInt.h:489

llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335

llvm::APInt::sext
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028

llvm::APInt::isSubsetOf
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264

llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441

llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307

llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137

llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297

llvm::APInt::setLowBits
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411

llvm::APInt::extractBits
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483

llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287

llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585

llvm::APInt::lshr
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858

llvm::APInt::countr_one
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679

llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228

llvm::APSInt
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition Argument.h:32

llvm::Argument::getArgNo
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition Argument.h:50

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::size
size_t size() const
Get the array size.
Definition ArrayRef.h:141

llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition Instructions.h:507

llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition Instructions.h:639

llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition Instructions.h:710

llvm::AtomicRMWInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition Instructions.h:856

llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition Instructions.h:722

llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition Instructions.h:726

llvm::AtomicRMWInst::USubCond
@ USubCond
Subtract only if no unsigned overflow.
Definition Instructions.h:786

llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition Instructions.h:740

llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition Instructions.h:734

llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition Instructions.h:728

llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition Instructions.h:730

llvm::AtomicRMWInst::USubSat
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition Instructions.h:790

llvm::AtomicRMWInst::UIncWrap
@ UIncWrap
Increment one up to a maximum value.
Definition Instructions.h:778

llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition Instructions.h:738

llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition Instructions.h:744

llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition Instructions.h:742

llvm::AtomicRMWInst::UDecWrap
@ UDecWrap
Decrement one until a minimum value or zero.
Definition Instructions.h:782

llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition Instructions.h:724

llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition Instructions.h:732

llvm::AtomicRMWInst::isFloatingPointOperation
bool isFloatingPointOperation() const
Definition Instructions.h:917

llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition Instructions.h:830

llvm::AtomicRMWInst::getValOperand
Value * getValOperand()
Definition Instructions.h:909

llvm::AtomicRMWInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition Instructions.h:882

llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition SelectionDAGNodes.h:1655

llvm::AtomicSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition SelectionDAGNodes.h:1672

llvm::Attribute::getValueAsString
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition Attributes.cpp:405

llvm::BaseIndexOffset::match
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
Definition SelectionDAGAddressAnalysis.cpp:301

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::getModule
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition BasicBlock.cpp:220

llvm::BitVector
Definition BitVector.h:101

llvm::BitVector::test
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482

llvm::BitVector::set
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366

llvm::BitVector::all
bool all() const
Returns true if all bits are set.
Definition BitVector.h:194

llvm::BlockAddressSDNode
Definition SelectionDAGNodes.h:2506

llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition CallingConvLower.h:171

llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition CallingConvLower.h:318

llvm::CCState::CheckReturn
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
Definition CallingConvLower.cpp:99

llvm::CCState::AnalyzeCallOperands
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
Definition CallingConvLower.cpp:126

llvm::CCState::getStackSize
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
Definition CallingConvLower.h:246

llvm::CCState::AnalyzeFormalArguments
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition CallingConvLower.cpp:85

llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition CallingConvLower.h:34

llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition CallingConvLower.h:123

llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition CallingConvLower.h:129

llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition CallingConvLower.h:135

llvm::CCValAssign::BCvt
@ BCvt
Definition CallingConvLower.h:47

llvm::CCValAssign::Full
@ Full
Definition CallingConvLower.h:37

llvm::CCValAssign::Indirect
@ Indirect
Definition CallingConvLower.h:53

llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition CallingConvLower.h:127

llvm::CCValAssign::getValVT
MVT getValVT() const
Definition CallingConvLower.h:121

llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition CallingConvLower.h:124

llvm::CCValAssign::getLocMemOffset
int64_t getLocMemOffset() const
Definition CallingConvLower.h:130

llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition CallingConvLower.h:133

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition InstrTypes.h:1181

llvm::CallBase::hasFnAttr
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition InstrTypes.h:1527

llvm::CallBase::isMustTailCall
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition Instructions.cpp:339

llvm::CallBase::isIndirectCall
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
Definition Instructions.cpp:330

llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition InstrTypes.h:1352

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1531

llvm::CallInst::isTailCall
bool isTailCall() const
Definition Instructions.h:1642

llvm::ConstantFPSDNode
Definition SelectionDAGNodes.h:1870

llvm::ConstantFPSDNode::isPosZero
bool isPosZero() const
Return true if the value is positive zero.
Definition SelectionDAGNodes.h:1890

llvm::ConstantFPSDNode::isExactlyValue
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition SelectionDAGNodes.h:1912

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition Constants.h:87

llvm::ConstantInt::isMinusOne
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231

llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168

llvm::ConstantPoolSDNode
Definition SelectionDAGNodes.h:2190

llvm::ConstantRange
This class represents a range of values.
Definition ConstantRange.h:48

llvm::ConstantSDNode
Definition SelectionDAGNodes.h:1815

llvm::ConstantSDNode::isOne
bool isOne() const
Definition SelectionDAGNodes.h:1840

llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition SelectionDAGNodes.h:1832

llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition SelectionDAGNodes.h:1831

llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition SelectionDAGNodes.h:1833

llvm::ConstantSDNode::isOpaque
bool isOpaque() const
Definition SelectionDAGNodes.h:1846

llvm::ConstantSDNode::isZero
bool isZero() const
Definition SelectionDAGNodes.h:1841

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::DWARFExpression::Operation::getNumOperands
uint64_t getNumOperands() const
Definition DWARFExpression.h:93

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::DataLayout::getPointerSizeInBits
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:501

llvm::DataLayout::getPrefTypeAlign
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition DataLayout.cpp:993

llvm::DebugLoc
A debug info location.
Definition DebugLoc.h:124

llvm::DemandedBits
Definition DemandedBits.h:41

llvm::DenseMapBase::at
ValueT & at(const_arg_type_t< KeyT > Val)
Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:270

llvm::DenseMapBase::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301

llvm::DenseMapBase::size
unsigned size() const
Definition DenseMap.h:174

llvm::DenseMap
Definition DenseMap.h:834

llvm::DenseSet
Implements a dense probed hash-table based set.
Definition DenseSet.h:289

llvm::DiagnosticInfoUnsupported
Diagnostic information for unsupported feature in backend.
Definition DiagnosticInfo.h:1103

llvm::ElementCount
Definition TypeSize.h:298

llvm::ElementCount::getScalable
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309

llvm::Expected
Tagged union holding either a T or a Error.
Definition Error.h:485

llvm::ExternalSymbolSDNode
Definition SelectionDAGNodes.h:2548

llvm::Function
Definition Function.h:65

llvm::Function::args
iterator_range< arg_iterator > args()
Definition Function.h:892

llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763

llvm::Function::getFnAttributeAsParsedInteger
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775

llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272

llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358

llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition Function.h:886

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728

llvm::GISelAddressing::BaseIndexOffset
Helper struct to store a base, index and offset that forms an address.
Definition LoadStoreOpt.h:39

llvm::GISelAddressing::BaseIndexOffset::getOffset
int64_t getOffset() const
Definition LoadStoreOpt.h:55

llvm::GISelValueTracking
Definition GISelValueTracking.h:34

llvm::GlobalAddressSDNode
Definition SelectionDAGNodes.h:2060

llvm::GlobalValue
Definition GlobalValue.h:49

llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition GlobalValue.h:531

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114

llvm::IRBuilderBase::CreateConstGEP1_32
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:2033

llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201

llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2563

llvm::IRBuilderBase::getPtrTy
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629

llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:576

llvm::InlineAsm::ConstraintCode
ConstraintCode
Definition InlineAsm.h:243

llvm::InlineAsm::ConstraintCode::A
@ A
Definition InlineAsm.h:251

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:82

llvm::Instruction
Definition Instruction.h:70

llvm::Instruction::getModule
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition Instruction.cpp:86

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition Instruction.h:344

llvm::Instruction::getDataLayout
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition Instruction.cpp:94

llvm::JumpTableSDNode
Definition SelectionDAGNodes.h:2169

llvm::LLT
Definition LowLevelType.h:45

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LLVMContext::diagnose
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition LLVMContext.cpp:249

llvm::LSBaseSDNode
Base class for LoadSDNode and StoreSDNode.
Definition SelectionDAGNodes.h:2623

llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition SelectionDAGNodes.h:2644

llvm::LibcallLoweringInfo
Tracks which library functions to use for a particular subtarget.
Definition LibcallLoweringInfo.h:23

llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition SelectionDAGNodes.h:2656

llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition SelectionDAGNodes.h:2675

llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition MemoryLocation.h:95

llvm::LocationSize::beforeOrAfterPointer
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Definition MemoryLocation.h:124

llvm::MCContext
Context object for machine code objects.
Definition MCContext.h:83

llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34

llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition MCObjectFileInfo.h:257

llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214

llvm::MDNode
Metadata node.
Definition Metadata.h:1075

llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1439

llvm::MVT
Machine Value Type.
Definition MachineValueType.h:36

llvm::MVT::isVectorOf
bool isVectorOf(MVT EltVT) const
Return true if this is a vector with matching element type.
Definition MachineValueType.h:112

llvm::MVT::getFloatingPointVT
static MVT getFloatingPointVT(unsigned BitWidth)
Definition MachineValueType.h:459

llvm::MVT::SimpleValueType
SimpleValueType
Definition MachineValueType.h:38

llvm::MVT::INVALID_SIMPLE_VALUE_TYPE
@ INVALID_SIMPLE_VALUE_TYPE
Definition MachineValueType.h:41

llvm::MVT::integer_fixedlen_vector_valuetypes
static auto integer_fixedlen_vector_valuetypes()
Definition MachineValueType.h:581

llvm::MVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition MachineValueType.h:305

llvm::MVT::isRISCVVectorTuple
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
Definition MachineValueType.h:130

llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition MachineValueType.h:55

llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition MachineValueType.h:374

llvm::MVT::changeVectorElementType
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition MachineValueType.h:228

llvm::MVT::bitsLE
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
Definition MachineValueType.h:453

llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition MachineValueType.h:322

llvm::MVT::getRISCVVectorTupleVT
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
Definition MachineValueType.h:499

llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition MachineValueType.h:106

llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition MachineValueType.h:90

llvm::MVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
Definition MachineValueType.h:118

llvm::MVT::getScalableVectorVT
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
Definition MachineValueType.h:489

llvm::MVT::getRISCVVectorTupleNumFields
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
Definition MachineValueType.h:510

llvm::MVT::changeTypeToInteger
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition MachineValueType.h:245

llvm::MVT::getVT
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition ValueTypes.cpp:249

llvm::MVT::bitsLT
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
Definition MachineValueType.h:446

llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition MachineValueType.h:336

llvm::MVT::isPow2VectorType
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition MachineValueType.h:269

llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition MachineValueType.h:370

llvm::MVT::getFltSemantics
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition ValueTypes.cpp:323

llvm::MVT::bitsGT
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
Definition MachineValueType.h:432

llvm::MVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition MachineValueType.h:145

llvm::MVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition MachineValueType.h:318

llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition MachineValueType.h:384

llvm::MVT::bitsGE
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
Definition MachineValueType.h:439

llvm::MVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
Definition MachineValueType.h:100

llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition MachineValueType.h:479

llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition MachineValueType.h:291

llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition MachineValueType.h:80

llvm::MVT::isValid
bool isValid() const
Return true if this is a valid simple valuetype.
Definition MachineValueType.h:74

llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition MachineValueType.h:469

llvm::MVT::getDoubleNumVectorElementsVT
MVT getDoubleNumVectorElementsVT() const
Definition MachineValueType.h:262

llvm::MVT::getHalfNumVectorElementsVT
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
Definition MachineValueType.h:253

llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition MachineValueType.h:287

llvm::MVT::integer_scalable_vector_valuetypes
static auto integer_scalable_vector_valuetypes()
Definition MachineValueType.h:593

llvm::MVT::changeVectorElementTypeToInteger
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition MachineValueType.h:217

llvm::MVT::fp_fixedlen_vector_valuetypes
static auto fp_fixedlen_vector_valuetypes()
Definition MachineValueType.h:587

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:119

llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition MachineBasicBlock.cpp:964

llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition MachineBasicBlock.h:1053

llvm::MachineBasicBlock::setCallFrameSize
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
Definition MachineBasicBlock.h:1281

llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition MachineBasicBlock.h:257

llvm::MachineBasicBlock::addSuccessor
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition MachineBasicBlock.cpp:825

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:381

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition MachineBasicBlock.h:340

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition MachineBasicBlock.h:367

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:383

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:327

llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition MachineBasicBlock.h:1160

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:345

llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition MachineFrameInfo.h:112

llvm::MachineFrameInfo::CreateFixedObject
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition MachineFrameInfo.cpp:83

llvm::MachineFrameInfo::computeMaxCallFrameSize
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
Definition MachineFrameInfo.cpp:187

llvm::MachineFrameInfo::CreateStackObject
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition MachineFrameInfo.cpp:51

llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition MachineFrameInfo.h:388

llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition MachineFrameInfo.h:677

llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition MachineFrameInfo.h:394

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:788

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition MachineFunction.cpp:565

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:804

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:798

llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition MachineFunction.cpp:316

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:749

llvm::MachineFunction::iterator
BasicBlockListType::iterator iterator
Definition MachineFunction.h:994

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:884

llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition MachineFunction.cpp:812

llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
Definition MachineFunction.cpp:528

llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition MachineFunction.h:1031

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:199

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:233

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:326

llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition MachineInstrBuilder.h:254

llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:248

llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition MachineInstrBuilder.h:304

llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition MachineInstrBuilder.h:191

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:601

llvm::MachineInstr::collectDebugValues
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
Definition MachineInstr.cpp:2536

llvm::MachineInstr::NoFPExcept
@ NoFPExcept
Definition MachineInstr.h:116

llvm::MachineInstr::NoFlags
@ NoFlags
Definition MachineInstr.h:89

llvm::MachineInstr::setFlag
void setFlag(MIFlag Flag)
Set a MI flag.
Definition MachineInstr.h:430

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:609

llvm::MachineInstr::eraseFromParent
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition MachineInstr.cpp:796

llvm::MachineJumpTableInfo
Definition MachineJumpTableInfo.h:48

llvm::MachineJumpTableInfo::EK_Custom32
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
Definition MachineJumpTableInfo.h:88

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition MachineMemOperand.h:130

llvm::MachineMemOperand::getRanges
const MDNode * getRanges() const
Return the range tag for the memory reference.
Definition MachineMemOperand.h:272

llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition MachineMemOperand.h:133

llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition MachineMemOperand.h:141

llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition MachineMemOperand.h:145

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition MachineMemOperand.h:143

llvm::MachineMemOperand::MONone
@ MONone
Definition MachineMemOperand.h:135

llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition MachineMemOperand.h:147

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition MachineMemOperand.h:139

llvm::MachineMemOperand::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition MachineMemOperand.h:207

llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition MachineMemOperand.h:227

llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition MachineMemOperand.h:269

llvm::MachineMemOperand::getBaseAlign
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
Definition MachineMemOperand.h:266

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition MachineOperand.h:833

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition MachineOperand.h:851

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::getVRegDef
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition MachineRegisterInfo.cpp:404

llvm::MachineRegisterInfo::createVirtualRegister
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition MachineRegisterInfo.cpp:154

llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition SelectionDAGNodes.h:1418

llvm::MemSDNode::getAlign
Align getAlign() const
Definition SelectionDAGNodes.h:1443

llvm::MemSDNode::isSimple
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Definition SelectionDAGNodes.h:1506

llvm::MemSDNode::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
Definition SelectionDAGNodes.h:1486

llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
Definition SelectionDAGNodes.h:1514

llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition SelectionDAGNodes.h:1581

llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition SelectionDAGNodes.h:1509

llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition MemoryLocation.h:222

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::MutableArrayRef
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294

llvm::RISCVConstantPoolValue
A RISCV-specific constant pool value.
Definition RISCVConstantPoolValue.h:28

llvm::RISCVConstantPoolValue::Create
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
Definition RISCVConstantPoolValue.cpp:29

llvm::RISCVInstrInfo
Definition RISCVInstrInfo.h:81

llvm::RISCVMachineFunctionInfo
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
Definition RISCVMachineFunctionInfo.h:48

llvm::RISCVMachineFunctionInfo::setIsVectorCall
void setIsVectorCall()
Definition RISCVMachineFunctionInfo.h:238

llvm::RISCVMachineFunctionInfo::setIncomingIndirectArg
void setIncomingIndirectArg(unsigned ArgIndex, Register Reg)
Definition RISCVMachineFunctionInfo.h:157

llvm::RISCVMachineFunctionInfo::setVarArgsFrameIndex
void setVarArgsFrameIndex(int Index)
Definition RISCVMachineFunctionInfo.h:110

llvm::RISCVMachineFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex() const
Definition RISCVMachineFunctionInfo.h:109

llvm::RISCVMachineFunctionInfo::setVarArgsSaveSize
void setVarArgsSaveSize(int Size)
Definition RISCVMachineFunctionInfo.h:113

llvm::RISCVMachineFunctionInfo::getIncomingIndirectArg
Register getIncomingIndirectArg(unsigned ArgIndex) const
Definition RISCVMachineFunctionInfo.h:160

llvm::RISCVMachineFunctionInfo::addSExt32Register
void addSExt32Register(Register Reg)
Definition RISCVMachineFunctionInfo.cpp:144

llvm::RISCVMachineFunctionInfo::hasCFProtectionBranch
bool hasCFProtectionBranch() const
Definition RISCVMachineFunctionInfo.h:243

llvm::RISCVSelectionDAGInfo
Definition RISCVSelectionDAGInfo.h:28

llvm::RISCVSubtarget
Definition RISCVSubtarget.h:83

llvm::RISCVSubtarget::NLog2N
@ NLog2N
Definition RISCVSubtarget.h:95

llvm::RISCVSubtarget::getMaxLMULForFixedLengthVectors
unsigned getMaxLMULForFixedLengthVectors() const
Definition RISCVSubtarget.cpp:229

llvm::RISCVSubtarget::hasVInstructionsI64
bool hasVInstructionsI64() const
Definition RISCVSubtarget.h:308

llvm::RISCVSubtarget::isPExtPackedDoubleType
bool isPExtPackedDoubleType(MVT VT) const
Definition RISCVSubtarget.cpp:182

llvm::RISCVSubtarget::hasVInstructionsF64
bool hasVInstructionsF64() const
Definition RISCVSubtarget.h:315

llvm::RISCVSubtarget::hasStdExtZfhOrZhinx
bool hasStdExtZfhOrZhinx() const
Definition RISCVSubtarget.h:186

llvm::RISCVSubtarget::hasShlAdd
bool hasShlAdd(int64_t ShAmt) const
Definition RISCVSubtarget.h:223

llvm::RISCVSubtarget::getRealMinVLen
unsigned getRealMinVLen() const
Definition RISCVSubtarget.h:265

llvm::RISCVSubtarget::useRVVForFixedLengthVectors
bool useRVVForFixedLengthVectors() const
Definition RISCVSubtarget.cpp:238

llvm::RISCVSubtarget::hasVInstructionsBF16Minimal
bool hasVInstructionsBF16Minimal() const
Definition RISCVSubtarget.h:311

llvm::RISCVSubtarget::getXLenVT
MVT getXLenVT() const
Definition RISCVSubtarget.h:233

llvm::RISCVSubtarget::hasVInstructionsF16Minimal
bool hasVInstructionsF16Minimal() const
Definition RISCVSubtarget.h:309

llvm::RISCVSubtarget::getXLen
unsigned getXLen() const
Definition RISCVSubtarget.h:236

llvm::RISCVSubtarget::hasConditionalMoveFusion
bool hasConditionalMoveFusion() const
Definition RISCVSubtarget.h:217

llvm::RISCVSubtarget::hasVInstructionsF16
bool hasVInstructionsF16() const
Definition RISCVSubtarget.h:310

llvm::RISCVSubtarget::hasVInstructionsBF16
bool hasVInstructionsBF16() const
Definition RISCVSubtarget.h:316

llvm::RISCVSubtarget::getMaxBuildIntsCost
unsigned getMaxBuildIntsCost() const
Definition RISCVSubtarget.cpp:189

llvm::RISCVSubtarget::hasVInstructions
bool hasVInstructions() const
Definition RISCVSubtarget.h:307

llvm::RISCVSubtarget::isRegisterReservedByUser
bool isRegisterReservedByUser(Register i) const override
Definition RISCVSubtarget.h:298

llvm::RISCVSubtarget::getRealVLen
std::optional< unsigned > getRealVLen() const
Definition RISCVSubtarget.h:274

llvm::RISCVSubtarget::useConstantPoolForLargeInts
bool useConstantPoolForLargeInts() const
Definition RISCVSubtarget.cpp:165

llvm::RISCVSubtarget::isLittleEndian
bool isLittleEndian() const
Definition RISCVSubtarget.h:232

llvm::RISCVSubtarget::getRealMaxVLen
unsigned getRealMaxVLen() const
Definition RISCVSubtarget.h:269

llvm::RISCVSubtarget::getRegisterInfo
const RISCVRegisterInfo * getRegisterInfo() const override
Definition RISCVSubtarget.h:147

llvm::RISCVSubtarget::getInstrInfo
const RISCVInstrInfo * getInstrInfo() const override
Definition RISCVSubtarget.h:146

llvm::RISCVSubtarget::hasBEXTILike
bool hasBEXTILike() const
Definition RISCVSubtarget.h:211

llvm::RISCVSubtarget::getTargetLowering
const RISCVTargetLowering * getTargetLowering() const override
Definition RISCVSubtarget.h:150

llvm::RISCVSubtarget::hasVInstructionsF32
bool hasVInstructionsF32() const
Definition RISCVSubtarget.h:314

llvm::RISCVSubtarget::hasCZEROLike
bool hasCZEROLike() const
Definition RISCVSubtarget.h:213

llvm::RISCVSubtarget::getELen
unsigned getELen() const
Definition RISCVSubtarget.h:261

llvm::RISCVSubtarget::getFLen
unsigned getFLen() const
Definition RISCVSubtarget.h:241

llvm::RISCVSubtarget::is64Bit
bool is64Bit() const
Definition RISCVSubtarget.h:231

llvm::RISCVTargetLowering
Definition RISCVISelLowering.h:29

llvm::RISCVTargetLowering::computeVLMAXBounds
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
Definition RISCVISelLowering.cpp:3380

llvm::RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
Definition RISCVISelLowering.cpp:3086

llvm::RISCVTargetLowering::getRoundingControlRegisters
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
Definition RISCVISelLowering.cpp:27066

llvm::RISCVTargetLowering::getM1VT
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
Definition RISCVISelLowering.h:370

llvm::RISCVTargetLowering::getVRGatherVVCost
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
Definition RISCVISelLowering.cpp:3438

llvm::RISCVTargetLowering::getIndexedAddressParts
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
Definition RISCVISelLowering.cpp:26110

llvm::RISCVTargetLowering::getIRStackGuard
Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Definition RISCVISelLowering.cpp:26630

llvm::RISCVTargetLowering::getSubregIndexByMVT
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Definition RISCVISelLowering.cpp:3014

llvm::RISCVTargetLowering::shouldConvertFpToSat
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
Definition RISCVISelLowering.cpp:26075

llvm::RISCVTargetLowering::shouldExpandAtomicRMWInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition RISCVISelLowering.cpp:25919

llvm::RISCVTargetLowering::getInlineAsmMemConstraint
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
Definition RISCVISelLowering.cpp:25831

llvm::RISCVTargetLowering::LowerReturn
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
Definition RISCVISelLowering.cpp:25314

llvm::RISCVTargetLowering::mayBeEmittedAsTailCall
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Definition RISCVISelLowering.cpp:25502

llvm::RISCVTargetLowering::RISCVTargetLowering
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
Definition RISCVISelLowering.cpp:99

llvm::RISCVTargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition RISCVISelLowering.cpp:24178

llvm::RISCVTargetLowering::emitLeadingFence
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
Definition RISCVISelLowering.cpp:25885

llvm::RISCVTargetLowering::isTruncateFree
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition RISCVISelLowering.cpp:2489

llvm::RISCVTargetLowering::shouldRemoveExtendFromGSIndex
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Definition RISCVISelLowering.cpp:26065

llvm::RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
Definition RISCVISelLowering.cpp:25973

llvm::RISCVTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Definition RISCVISelLowering.cpp:26355

llvm::RISCVTargetLowering::getTargetConstantFromLoad
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
Definition RISCVISelLowering.cpp:23482

llvm::RISCVTargetLowering::getSubtarget
const RISCVSubtarget & getSubtarget() const
Definition RISCVISelLowering.h:36

llvm::RISCVTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition RISCVISelLowering.cpp:21362

llvm::RISCVTargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition RISCVISelLowering.cpp:2693

llvm::RISCVTargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition RISCVISelLowering.cpp:23108

llvm::RISCVTargetLowering::preferScalarizeSplat
bool preferScalarizeSplat(SDNode *N) const override
Definition RISCVISelLowering.cpp:26613

llvm::RISCVTargetLowering::shouldExtendTypeInLibCall
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Definition RISCVISelLowering.cpp:26271

llvm::RISCVTargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition RISCVISelLowering.cpp:2480

llvm::RISCVTargetLowering::shouldSignExtendTypeInLibCall
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Definition RISCVISelLowering.cpp:26281

llvm::RISCVTargetLowering::LowerCustomJumpTableEntry
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
Definition RISCVISelLowering.cpp:26102

llvm::RISCVTargetLowering::getVRGatherVICost
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
Definition RISCVISelLowering.cpp:3453

llvm::RISCVTargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
Definition RISCVISelLowering.cpp:2617

llvm::RISCVTargetLowering::targetShrinkDemandedConstant
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Definition RISCVISelLowering.cpp:22941

llvm::RISCVTargetLowering::shouldExpandBuildVectorWithShuffles
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
Definition RISCVISelLowering.cpp:3406

llvm::RISCVTargetLowering::getRegisterTypeForCallingConv
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
Definition RISCVISelLowering.cpp:2812

llvm::RISCVTargetLowering::decomposeMulByConstant
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Definition RISCVISelLowering.cpp:26289

llvm::RISCVTargetLowering::CanLowerReturn
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Definition RISCVISelLowering.cpp:25304

llvm::RISCVTargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition RISCVISelLowering.cpp:2442

llvm::RISCVTargetLowering::hasAndNotCompare
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
Definition RISCVISelLowering.cpp:2570

llvm::RISCVTargetLowering::shouldScalarizeBinop
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
Definition RISCVISelLowering.cpp:2671

llvm::RISCVTargetLowering::getExtendForAtomicRMWArg
ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override
Returns how the platform's atomic rmw operations expect their input argument to be extended (ZERO_EXT...
Definition RISCVISelLowering.cpp:26242

llvm::RISCVTargetLowering::isDesirableToCommuteWithShift
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Definition RISCVISelLowering.cpp:22854

llvm::RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
Definition RISCVISelLowering.cpp:26772

llvm::RISCVTargetLowering::hasBitTest
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
Definition RISCVISelLowering.cpp:2589

llvm::RISCVTargetLowering::computeVLMAX
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
Definition RISCVISelLowering.h:353

llvm::RISCVTargetLowering::shouldExpandCttzElements
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
Definition RISCVISelLowering.cpp:2059

llvm::RISCVTargetLowering::isCheapToSpeculateCtlz
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition RISCVISelLowering.cpp:2550

llvm::RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
Definition RISCVISelLowering.cpp:26046

llvm::RISCVTargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Definition RISCVISelLowering.cpp:2724

llvm::RISCVTargetLowering::getLMULCost
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
Definition RISCVISelLowering.cpp:3411

llvm::RISCVTargetLowering::getJumpTableEncoding
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
Definition RISCVISelLowering.cpp:26092

llvm::RISCVTargetLowering::isMulAddWithConstProfitable
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
Definition RISCVISelLowering.cpp:26332

llvm::RISCVTargetLowering::getVSlideVICost
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
Definition RISCVISelLowering.cpp:3469

llvm::RISCVTargetLowering::fallBackToDAGISel
bool fallBackToDAGISel(const Instruction &Inst) const override
Definition RISCVISelLowering.cpp:26824

llvm::RISCVTargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Definition RISCVISelLowering.cpp:26381

llvm::RISCVTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition RISCVISelLowering.cpp:2017

llvm::RISCVTargetLowering::isCtpopFast
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
Definition RISCVISelLowering.cpp:26777

llvm::RISCVTargetLowering::ComputeNumSignBitsForTargetNode
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
Definition RISCVISelLowering.cpp:23326

llvm::RISCVTargetLowering::getContainerForFixedLengthVector
MVT getContainerForFixedLengthVector(MVT VT) const
Definition RISCVISelLowering.cpp:3297

llvm::RISCVTargetLowering::finalizeLowering
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
Definition RISCVISelLowering.cpp:26608

llvm::RISCVTargetLowering::getRegClassIDForVecVT
static unsigned getRegClassIDForVecVT(MVT VT)
Definition RISCVISelLowering.cpp:3035

llvm::RISCVTargetLowering::getExceptionPointerRegister
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
Definition RISCVISelLowering.cpp:26261

llvm::RISCVTargetLowering::isExtractSubvectorCheap
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
Definition RISCVISelLowering.cpp:2767

llvm::RISCVTargetLowering::getRegForInlineAsmConstraint
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Definition RISCVISelLowering.cpp:25537

llvm::RISCVTargetLowering::emitDynamicProbedAlloc
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition RISCVISelLowering.cpp:27004

llvm::RISCVTargetLowering::getTargetMMOFlags
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
Definition RISCVISelLowering.cpp:26728

llvm::RISCVTargetLowering::computeVLMax
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
Definition RISCVISelLowering.cpp:3372

llvm::RISCVTargetLowering::signExtendConstant
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
Definition RISCVISelLowering.cpp:2542

llvm::RISCVTargetLowering::shouldTransformSignedTruncationCheck
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
Definition RISCVISelLowering.cpp:22835

llvm::RISCVTargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Definition RISCVISelLowering.cpp:2648

llvm::RISCVTargetLowering::hasInlineStackProbe
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Definition RISCVISelLowering.cpp:26952

llvm::RISCVTargetLowering::hasAndNot
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Definition RISCVISelLowering.cpp:2580

llvm::RISCVTargetLowering::getRegisterByName
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
Definition RISCVISelLowering.cpp:26712

llvm::RISCVTargetLowering::getVSlideVXCost
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
Definition RISCVISelLowering.cpp:3461

llvm::RISCVTargetLowering::isReassocProfitable
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
Definition RISCVISelLowering.cpp:27083

llvm::RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition RISCVISelLowering.cpp:26033

llvm::RISCVTargetLowering::shouldFoldMaskToVariableShiftPair
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
Definition RISCVISelLowering.cpp:27074

llvm::RISCVTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition RISCVISelLowering.cpp:7866

llvm::RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
Definition RISCVISelLowering.cpp:23455

llvm::RISCVTargetLowering::isUsedByReturnOnly
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
Definition RISCVISelLowering.cpp:25465

llvm::RISCVTargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
Definition RISCVISelLowering.cpp:26215

llvm::RISCVTargetLowering::getExceptionSelectorRegister
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Definition RISCVISelLowering.cpp:26266

llvm::RISCVTargetLowering::getCustomCtpopCost
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
Definition RISCVISelLowering.cpp:26790

llvm::RISCVTargetLowering::AdjustInstrPostInstrSelection
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
Definition RISCVISelLowering.cpp:24282

llvm::RISCVTargetLowering::isShuffleMaskLegal
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
Definition RISCVISelLowering.cpp:6933

llvm::RISCVTargetLowering::isCheapToSpeculateCttz
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition RISCVISelLowering.cpp:2546

llvm::RISCVTargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition RISCVISelLowering.cpp:2476

llvm::RISCVTargetLowering::getExtendForAtomicCmpSwapArg
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
Definition RISCVISelLowering.cpp:26237

llvm::RISCVTargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
Definition RISCVISelLowering.cpp:24490

llvm::RISCVTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition RISCVISelLowering.cpp:14906

llvm::RISCVTargetLowering::isLegalElementTypeForRVV
bool isLegalElementTypeForRVV(EVT ScalarTy) const
Definition RISCVISelLowering.cpp:3142

llvm::RISCVTargetLowering::isLegalFirstFaultLoad
bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const
Return true if a fault-only-first load of the given result type and alignment is legal.
Definition RISCVISelLowering.cpp:26674

llvm::RISCVTargetLowering::getLegalZfaFPImm
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
Definition RISCVISelLowering.cpp:2704

llvm::RISCVTargetLowering::LowerAsmOperandForConstraint
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
Definition RISCVISelLowering.cpp:25845

llvm::RISCVTargetLowering::splitValueIntoRegisterParts
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Definition RISCVISelLowering.cpp:26432

llvm::RISCVTargetLowering::emitTrailingFence
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Definition RISCVISelLowering.cpp:25901

llvm::RISCVTargetLowering::getNumRegistersForCallingConv
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
Definition RISCVISelLowering.cpp:2835

llvm::RISCVTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
Definition RISCVISelLowering.cpp:25509

llvm::RISCVTargetLowering::getTgtMemIntrinsic
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
Definition RISCVISelLowering.cpp:2064

llvm::RISCVTargetLowering::EmitKCFICheck
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
Definition RISCVISelLowering.cpp:26691

llvm::RISCVTargetLowering::isIntDivCheap
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
Definition RISCVISelLowering.cpp:26599

llvm::RISCVTargetLowering::getPreferredVectorAction
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Customize the preferred legalization strategy for certain types.
Definition RISCVISelLowering.cpp:2009

llvm::RISCVTargetLowering::expandIndirectJTBranch
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
Definition RISCVISelLowering.cpp:26898

llvm::RISCVTargetLowering::getRegClassIDForLMUL
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
Definition RISCVISelLowering.cpp:2996

llvm::RISCVTargetLowering::getNumRegisters
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
Definition RISCVISelLowering.cpp:2825

llvm::RISCVTargetLowering::getPostIndexedAddressParts
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
Definition RISCVISelLowering.cpp:26168

llvm::RISCVTargetLowering::computeKnownBitsForTargetInstr
void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition RISCVISelLowering.cpp:23303

llvm::RISCVTargetLowering::getPreIndexedAddressParts
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
Definition RISCVISelLowering.cpp:26146

llvm::RISCVTargetLowering::joinRegisterPartsIntoValue
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
Definition RISCVISelLowering.cpp:26525

llvm::RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
Definition RISCVISelLowering.cpp:2554

llvm::RISCVTargetLowering::isSExtCheaperThanZExt
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
Definition RISCVISelLowering.cpp:2538

llvm::RISCVTargetLowering::isLegalStridedLoadStore
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
Definition RISCVISelLowering.cpp:26654

llvm::RISCVTargetLowering::isSpreadMask
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
Definition RISCVISelLowering.cpp:5677

llvm::RISCVTargetLowering::getLMUL
static RISCVVType::VLMUL getLMUL(MVT VT)
Definition RISCVISelLowering.cpp:2949

llvm::RISCVTargetLowering::LowerCall
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
Definition RISCVISelLowering.cpp:24793

llvm::RISCVTargetLowering::SimplifyDemandedBitsForTargetNode
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition RISCVISelLowering.cpp:23422

llvm::RISCVTargetLowering::isZExtFree
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Definition RISCVISelLowering.cpp:2523

llvm::RISCVTargetLowering::shouldFoldSelectWithIdentityConstant
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
Definition RISCVISelLowering.cpp:2601

llvm::RISCVTargetLowering::getStackProbeSize
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
Definition RISCVISelLowering.cpp:26962

llvm::RISCVTargetLowering::shouldInsertFencesForAtomic
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Definition RISCVISelLowering.cpp:26795

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition SelectionDAGNodes.h:1246

llvm::SDNode
Represents one node in the SelectionDAG.
Definition SelectionDAGNodes.h:511

llvm::SDNode::ops
ArrayRef< SDUse > ops() const
Definition SelectionDAGNodes.h:1065

llvm::SDNode::getAsAPIntVal
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
Definition SelectionDAGNodes.h:1866

llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition SelectionDAGNodes.h:706

llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition SelectionDAGNodes.h:778

llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition SelectionDAGNodes.h:906

llvm::SDNode::getFlags
SDNodeFlags getFlags() const
Definition SelectionDAGNodes.h:1107

llvm::SDNode::use_size
size_t use_size() const
Return the number of uses of this node.
Definition SelectionDAGNodes.h:782

llvm::SDNode::getSimpleValueType
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
Definition SelectionDAGNodes.h:1132

llvm::SDNode::hasPredecessorHelper
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
Definition SelectionDAGNodes.h:975

llvm::SDNode::getAsZExtVal
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Definition SelectionDAGNodes.h:1858

llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition SelectionDAGNodes.h:1056

llvm::SDNode::bitcastToAPInt
std::optional< APInt > bitcastToAPInt() const
Definition SelectionDAGNodes.h:1925

llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition SelectionDAGNodes.h:1126

llvm::SDNode::setCFIType
void setCFIType(uint32_t Type)
Definition SelectionDAGNodes.h:1119

llvm::SDNode::isUndef
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Definition SelectionDAGNodes.h:713

llvm::SDNode::users
iterator_range< user_iterator > users()
Definition SelectionDAGNodes.h:918

llvm::SDUse
Represents a use of a SDNode.
Definition SelectionDAGNodes.h:280

llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition SelectionDAGNodes.h:147

llvm::SDValue::isUndef
bool isUndef() const
Definition SelectionDAGNodes.h:1313

llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition SelectionDAGNodes.h:161

llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition SelectionDAGNodes.h:1323

llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition SelectionDAGNodes.h:181

llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition SelectionDAGNodes.h:1281

llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition SelectionDAGNodes.h:201

llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition SelectionDAGNodes.h:1289

llvm::SDValue::getConstantOperandAPInt
const APInt & getConstantOperandAPInt(unsigned i) const
Definition SelectionDAGNodes.h:1297

llvm::SDValue::getScalarValueSizeInBits
uint64_t getScalarValueSizeInBits() const
Definition SelectionDAGNodes.h:205

llvm::SDValue::getResNo
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Definition SelectionDAGNodes.h:158

llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition SelectionDAGNodes.h:1293

llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition SelectionDAGNodes.h:192

llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition SelectionDAGNodes.h:1277

llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition SelectionDAGNodes.h:1285

llvm::SelectionDAGTargetInfo::isTargetStrictFPOpcode
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
Definition SelectionDAGTargetInfo.h:53

llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition SelectionDAG.h:231

llvm::SelectionDAG::getElementCount
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
Definition SelectionDAG.cpp:2209

llvm::SelectionDAG::getReducedAlign
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
Definition SelectionDAG.cpp:2637

llvm::SelectionDAG::getExtLoad
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition SelectionDAG.cpp:10663

llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition SelectionDAG.h:781

llvm::SelectionDAG::getExtractVectorElt
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
Definition SelectionDAG.h:969

llvm::SelectionDAG::ComputeMaxSignificantBits
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
Definition SelectionDAG.cpp:5603

llvm::SelectionDAG::getMaskedGather
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
Definition SelectionDAG.cpp:11389

llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition SelectionDAG.h:516

llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition SelectionDAG.h:841

llvm::SelectionDAG::getMergeValues
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition SelectionDAG.cpp:10381

llvm::SelectionDAG::getVTList
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition SelectionDAG.cpp:12073

llvm::SelectionDAG::getShiftAmountConstant
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
Definition SelectionDAG.cpp:1872

llvm::SelectionDAG::getSplatValue
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
Definition SelectionDAG.cpp:3226

llvm::SelectionDAG::getAllOnesConstant
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition SelectionDAG.cpp:1861

llvm::SelectionDAG::getMachineNode
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition SelectionDAG.cpp:12514

llvm::SelectionDAG::getAtomicLoad
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
Definition SelectionDAG.cpp:10372

llvm::SelectionDAG::getFreeze
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
Definition SelectionDAG.cpp:2568

llvm::SelectionDAG::getStridedLoadVP
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
Definition SelectionDAG.cpp:11069

llvm::SelectionDAG::makeEquivalentMemoryOrdering
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
Definition SelectionDAG.cpp:13572

llvm::SelectionDAG::getJumpTableDebugInfo
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
Definition SelectionDAG.cpp:2037

llvm::SelectionDAG::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
Definition SelectionDAG.h:2764

llvm::SelectionDAG::UnrollVectorOp
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
Definition SelectionDAG.cpp:14174

llvm::SelectionDAG::getConstantFP
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition SelectionDAG.cpp:1934

llvm::SelectionDAG::getExtractSubvector
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
Definition SelectionDAG.h:991

llvm::SelectionDAG::getRegister
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
Definition SelectionDAG.cpp:2434

llvm::SelectionDAG::getLoad
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition SelectionDAG.cpp:10646

llvm::SelectionDAG::getMemIntrinsicNode
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition SelectionDAG.cpp:10392

llvm::SelectionDAG::getInsertSubvector
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
Definition SelectionDAG.h:984

llvm::SelectionDAG::getStepVector
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
Definition SelectionDAG.cpp:2231

llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition SelectionDAG.h:1382

llvm::SelectionDAG::getMemcpy
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
Definition SelectionDAG.cpp:9957

llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
Definition SelectionDAG.h:2725

llvm::SelectionDAG::shouldOptForSize
LLVM_ABI bool shouldOptForSize() const
Definition SelectionDAG.cpp:1454

llvm::SelectionDAG::SplitVectorOperand
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
Definition SelectionDAG.h:2636

llvm::SelectionDAG::getNOT
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition SelectionDAG.cpp:1681

llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition SelectionDAG.h:520

llvm::SelectionDAG::getStridedStoreVP
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition SelectionDAG.cpp:11122

llvm::SelectionDAG::getIdentityElement
LLVM_ABI SDValue getIdentityElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) identity element for the given opcode, if it exists.
Definition SelectionDAG.cpp:15024

llvm::SelectionDAG::NewNodesMustHaveLegalTypes
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition SelectionDAG.h:403

llvm::SelectionDAG::GetSplitDestVTs
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
Definition SelectionDAG.cpp:14413

llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition SelectionDAG.h:792

llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition SelectionDAG.h:1207

llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition SelectionDAG.h:1184

llvm::SelectionDAG::getGatherVP
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
Definition SelectionDAG.cpp:11207

llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition SelectionDAG.h:896

llvm::SelectionDAG::isSplatValue
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
Definition SelectionDAG.cpp:2968

llvm::SelectionDAG::getBitcast
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition SelectionDAG.cpp:2539

llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition SelectionDAG.h:867

llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition SelectionDAG.h:1412

llvm::SelectionDAG::getNegative
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
Definition SelectionDAG.cpp:1676

llvm::SelectionDAG::setNodeMemRefs
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition SelectionDAG.cpp:12282

llvm::SelectionDAG::getZeroExtendInReg
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition SelectionDAG.cpp:1621

llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition SelectionDAG.h:514

llvm::SelectionDAG::getSelectionDAGInfo
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition SelectionDAG.h:529

llvm::SelectionDAG::getStoreVP
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition SelectionDAG.cpp:10930

llvm::SelectionDAG::getConstant
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition SelectionDAG.cpp:1725

llvm::SelectionDAG::getMemBasePlusOffset
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
Definition SelectionDAG.cpp:9253

llvm::SelectionDAG::getSignedTargetConstant
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition SelectionDAG.h:742

llvm::SelectionDAG::getTruncStore
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition SelectionDAG.cpp:10772

llvm::SelectionDAG::getTypeSize
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
Definition SelectionDAG.cpp:2214

llvm::SelectionDAG::ReplaceAllUsesWith
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition SelectionDAG.cpp:13031

llvm::SelectionDAG::SplitVector
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
Definition SelectionDAG.cpp:14458

llvm::SelectionDAG::getStore
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition SelectionDAG.cpp:10696

llvm::SelectionDAG::getSignedConstant
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Definition SelectionDAG.cpp:1855

llvm::SelectionDAG::getSplatVector
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition SelectionDAG.h:930

llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition SelectionDAG.h:1172

llvm::SelectionDAG::SignBitIsZero
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition SelectionDAG.cpp:2865

llvm::SelectionDAG::getInsertVectorElt
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
Definition SelectionDAG.h:977

llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition SelectionDAG.h:1422

llvm::SelectionDAG::isIdentityElement
LLVM_ABI bool isIdentityElement(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo, unsigned Depth=0) const
Returns true if V is an identity element of Opc with Flags.
Definition SelectionDAG.cpp:13654

llvm::SelectionDAG::FoldConstantArithmetic
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
Definition SelectionDAG.cpp:7587

llvm::SelectionDAG::getMaskedStore
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition SelectionDAG.cpp:11340

llvm::SelectionDAG::getExternalSymbol
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition SelectionDAG.cpp:2128

llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition SelectionDAG.h:515

llvm::SelectionDAG::getStrictFPExtendOrRound
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
Definition SelectionDAG.cpp:1536

llvm::SelectionDAG::SplitEVL
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
Definition SelectionDAG.cpp:14477

llvm::SelectionDAG::getAnyExtOrTrunc
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Definition SelectionDAG.cpp:1549

llvm::SelectionDAG::getLoadVP
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
Definition SelectionDAG.cpp:10809

llvm::SelectionDAG::getIntPtrConstant
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition SelectionDAG.cpp:1867

llvm::SelectionDAG::getScatterVP
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
Definition SelectionDAG.cpp:11250

llvm::SelectionDAG::getValueType
LLVM_ABI SDValue getValueType(EVT)
Definition SelectionDAG.cpp:2114

llvm::SelectionDAG::getNode
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition SelectionDAG.cpp:11704

llvm::SelectionDAG::getFPExtendOrRound
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
Definition SelectionDAG.cpp:1528

llvm::SelectionDAG::isKnownNeverNaN
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
Definition SelectionDAG.cpp:6331

llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition SelectionDAG.h:730

llvm::SelectionDAG::ComputeNumSignBits
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition SelectionDAG.cpp:4903

llvm::SelectionDAG::getBoolConstant
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
Definition SelectionDAG.cpp:1710

llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition SelectionDAG.h:836

llvm::SelectionDAG::getVectorIdxConstant
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition SelectionDAG.cpp:1885

llvm::SelectionDAG::ReplaceAllUsesOfValueWith
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition SelectionDAG.cpp:13193

llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition SelectionDAG.h:509

llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition SelectionDAG.h:913

llvm::SelectionDAG::getFrameIndex
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition SelectionDAG.cpp:2001

llvm::SelectionDAG::computeKnownBits
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition SelectionDAG.cpp:3350

llvm::SelectionDAG::getRegisterMask
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
Definition SelectionDAG.cpp:2450

llvm::SelectionDAG::getZExtOrTrunc
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition SelectionDAG.cpp:1561

llvm::SelectionDAG::getCondCode
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
Definition SelectionDAG.cpp:2166

llvm::SelectionDAG::addCallSiteInfo
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
Definition SelectionDAG.h:2676

llvm::SelectionDAG::MaskedValueIsZero
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition SelectionDAG.cpp:2916

llvm::SelectionDAG::getObjectPtrOffset
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition SelectionDAG.h:1157

llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition SelectionDAG.h:534

llvm::SelectionDAG::getTargetExternalSymbol
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition SelectionDAG.cpp:2150

llvm::SelectionDAG::CreateStackTemporary
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition SelectionDAG.cpp:2670

llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition SelectionDAG.h:801

llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition SelectionDAG.h:604

llvm::SelectionDAG::getMaskedLoad
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
Definition SelectionDAG.cpp:11294

llvm::SelectionDAG::getSplat
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition SelectionDAG.h:946

llvm::SelectionDAG::SplitScalar
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
Definition SelectionDAG.cpp:14398

llvm::SelectionDAG::getVectorShuffle
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition SelectionDAG.cpp:2253

llvm::SelectionDAG::getLogicalNOT
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
Definition SelectionDAG.cpp:1685

llvm::SelectionDAG::getMaskedScatter
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
Definition SelectionDAG.cpp:11436

llvm::ShuffleVectorInst::isSelectMask
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition Instructions.cpp:2019

llvm::ShuffleVectorInst::isBitRotateMask
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
Definition Instructions.cpp:2520

llvm::ShuffleVectorInst::isSingleSourceMask
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
Definition Instructions.cpp:1959

llvm::ShuffleVectorInst::isDeInterleaveMaskOfFactor
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
Definition Instructions.cpp:2473

llvm::ShuffleVectorInst::isIdentityMask
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition Instructions.cpp:1977

llvm::ShuffleVectorInst::isReverseMask
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition Instructions.cpp:1985

llvm::ShuffleVectorInst::isInsertSubvectorMask
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
Definition Instructions.cpp:2133

llvm::ShuffleVectorInst::isInterleaveMask
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Definition Instructions.cpp:2389

llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition SelectionDAGNodes.h:1755

llvm::ShuffleVectorSDNode::getSplatIndex
int getSplatIndex() const
Definition SelectionDAGNodes.h:1780

llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition SelectionDAGNodes.h:1768

llvm::ShuffleVectorSDNode::isSplat
bool isSplat() const
Definition SelectionDAGNodes.h:1778

llvm::ShuffleVectorSDNode::isSplatMask
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
Definition SelectionDAG.cpp:14896

llvm::SmallDenseMap
Definition DenseMap.h:977

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:387

llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition SmallPtrSet.h:467

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:533

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134

llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:581

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:681

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:966

llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition SmallVector.h:671

llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition SmallVector.h:757

llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition SmallVector.h:691

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition SmallVector.h:278

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:83

llvm::SmallVectorTemplateCommon::front
reference front()
Definition SmallVector.h:308

llvm::SmallVectorTemplateCommon::data
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition SmallVector.h:295

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition SmallVector.h:276

llvm::SmallVectorTemplateCommon::back
reference back()
Definition SmallVector.h:317

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition SelectionDAGNodes.h:2684

llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::StringRef::size
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144

llvm::StringRef::lower
LLVM_ABI std::string lower() const
Definition StringRef.cpp:107

llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition StringSwitch.h:47

llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition StringSwitch.h:69

llvm::StringSwitch::Default
R Default(T Value)
Definition StringSwitch.h:134

llvm::StringSwitch::Cases
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Definition StringSwitch.h:88

llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition TargetFrameLowering.h:47

llvm::TargetFrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition TargetFrameLowering.h:303

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition TargetInstrInfo.h:115

llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition TargetLowering.h:2642

llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition TargetLowering.h:2705

llvm::TargetLoweringBase::finalizeLowering
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
Definition TargetLoweringBase.cpp:2782

llvm::TargetLoweringBase::setMaxDivRemBitWidthSupported
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
Definition TargetLowering.h:2934

llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition TargetLowering.h:4024

llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition TargetLowering.h:1777

llvm::TargetLoweringBase::Custom
@ Custom
Definition TargetLowering.h:208

llvm::TargetLoweringBase::Expand
@ Expand
Definition TargetLowering.h:206

llvm::TargetLoweringBase::Promote
@ Promote
Definition TargetLowering.h:205

llvm::TargetLoweringBase::LibCall
@ LibCall
Definition TargetLowering.h:207

llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition TargetLowering.h:3985

llvm::TargetLoweringBase::emitPatchPoint
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
Definition TargetLoweringBase.cpp:1597

llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition TargetLowering.h:1054

llvm::TargetLoweringBase::getIRStackGuard
virtual Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
Definition TargetLoweringBase.cpp:2458

llvm::TargetLoweringBase::getMinimumJumpTableEntries
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
Definition TargetLoweringBase.cpp:2525

llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition TargetLowering.h:374

llvm::TargetLoweringBase::MaxLoadsPerMemcmp
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
Definition TargetLowering.h:4004

llvm::TargetLoweringBase::getNumRegistersForCallingConv
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
Definition TargetLowering.h:1895

llvm::TargetLoweringBase::MaxGluedStoresPerMemcpy
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
Definition TargetLowering.h:3991

llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition TargetLowering.h:3232

llvm::TargetLoweringBase::getRegisterTypeForCallingConv
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
Definition TargetLowering.h:1887

llvm::TargetLoweringBase::setOperationPromotedToType
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
Definition TargetLowering.h:2878

llvm::TargetLoweringBase::LegalizeTypeAction
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
Definition TargetLowering.h:213

llvm::TargetLoweringBase::TypeSoftenFloat
@ TypeSoftenFloat
Definition TargetLowering.h:217

llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition TargetLowering.h:220

llvm::TargetLoweringBase::TypeWidenVector
@ TypeWidenVector
Definition TargetLowering.h:221

llvm::TargetLoweringBase::getMinCmpXchgSizeInBits
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
Definition TargetLowering.h:2276

llvm::TargetLoweringBase::setMaxLargeFPConvertBitWidthSupported
void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum fp to/from int conversion the backend supports.
Definition TargetLowering.h:2940

llvm::TargetLoweringBase::getNumRegisters
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
Definition TargetLowering.h:1863

llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition TargetLowering.h:2778

llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition TargetLowering.h:2914

llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Definition TargetLowering.h:2928

llvm::TargetLoweringBase::getPreferredVectorAction
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
Definition TargetLowering.h:538

llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition TargetLowering.h:2901

llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition TargetLowering.h:1402

llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition TargetLowering.h:3970

llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition TargetLowering.h:2628

llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition TargetLowering.h:4018

llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition TargetLoweringBase.cpp:1717

llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition TargetLowering.h:3137

llvm::TargetLoweringBase::getTypeToTransformTo
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition TargetLowering.h:1165

llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition TargetLowering.h:4020

llvm::TargetLoweringBase::shouldFoldSelectWithSingleBitTest
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
Definition TargetLowering.h:3589

llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition TargetLowering.h:2688

llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition TargetLowering.h:1105

llvm::TargetLoweringBase::EnableExtLdPromotion
bool EnableExtLdPromotion
Definition TargetLowering.h:4027

llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition TargetLowering.h:2795

llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition TargetLowering.h:381

llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition TargetLowering.h:2907

llvm::TargetLoweringBase::getMaxDivRemBitWidthSupported
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
Definition TargetLowering.h:2259

llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition TargetLowering.h:1483

llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition TargetLowering.h:3968

llvm::TargetLoweringBase::setPartialReduceMLAAction
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
Definition TargetLowering.h:2853

llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition TargetLowering.h:2768

llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition TargetLowering.h:239

llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition TargetLowering.h:240

llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition TargetLowering.h:1361

llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
Definition TargetLowering.h:4006

llvm::TargetLoweringBase::isBinOp
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
Definition TargetLowering.h:3106

llvm::TargetLoweringBase::setMinCmpXchgSizeInBits
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
Definition TargetLowering.h:2945

llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition TargetLowering.h:2663

llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition TargetLowering.h:2872

llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition TargetLowering.h:256

llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
Definition TargetLowering.h:264

llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
Definition TargetLowering.h:257

llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
Definition TargetLowering.h:265

llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition TargetLowering.h:2829

llvm::TargetLoweringBase::findRepresentativeClass
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
Definition TargetLoweringBase.cpp:1690

llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition TargetLowering.h:2893

llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition TargetLowering.h:2722

llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition TargetLowering.h:1152

llvm::TargetLoweringBase::IsStrictFPEnabled
bool IsStrictFPEnabled
Definition TargetLowering.h:4039

llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition TargetLowering.h:341

llvm::TargetLoweringBase::allowsMemoryAccessForAlignment
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
Definition TargetLoweringBase.cpp:2148

llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition TargetLowering.h:3983

llvm::TargetLoweringBase::setJumpIsExpensive
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
Definition TargetLoweringBase.cpp:1383

llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition TargetLowering.h:1389

llvm::TargetLoweringBase::getVPExplicitVectorLengthTy
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
Definition TargetLowering.h:453

llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition TargetLowering.h:4047

llvm::TargetLowering::expandAddSubSat
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
Definition TargetLowering.cpp:11854

llvm::TargetLowering::buildSDIVPow2WithCMov
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
Definition TargetLowering.cpp:6570

llvm::TargetLowering::ConstraintType
ConstraintType
Definition TargetLowering.h:5262

llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition TargetLowering.h:5264

llvm::TargetLowering::C_Memory
@ C_Memory
Definition TargetLowering.h:5265

llvm::TargetLowering::C_Immediate
@ C_Immediate
Definition TargetLowering.h:5267

llvm::TargetLowering::C_Other
@ C_Other
Definition TargetLowering.h:5268

llvm::TargetLowering::expandIndirectJTBranch
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
Definition TargetLowering.cpp:505

llvm::TargetLowering::getInlineAsmMemConstraint
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
Definition TargetLowering.h:5374

llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition TargetLowering.cpp:5802

llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition TargetLowering.cpp:11707

llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition SelectionDAGBuilder.cpp:11317

llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition TargetLowering.cpp:55

llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition TargetLowering.cpp:5946

llvm::TargetLowering::SimplifyDemandedBits
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Definition TargetLowering.cpp:1162

llvm::TargetLowering::SimplifyDemandedBitsForTargetNode
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition TargetLowering.cpp:3998

llvm::TargetLowering::TargetLowering
TargetLowering(const TargetLowering &)=delete

llvm::TargetLowering::canCreateUndefOrPoisonForTargetNode
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Definition TargetLowering.cpp:4065

llvm::TargetLowering::combineRepeatedFPDivisors
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
Definition TargetLowering.h:5444

llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition TargetLowering.cpp:5864

llvm::TargetLowering::getJumpTableEncoding
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Definition TargetLowering.cpp:482

llvm::TargetLowering::makeLibCall
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Definition TargetLowering.cpp:155

llvm::TargetLowering::setTypeIdForCallsiteInfo
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Definition TargetLowering.cpp:13709

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::TargetMachine::getTLSModel
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
Definition TargetMachine.cpp:258

llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition TargetMachine.h:132

llvm::TargetMachine::useTLSDESC
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
Definition TargetMachine.cpp:256

llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo & getMCSubtargetInfo() const
Definition TargetMachine.h:244

llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition TargetMachine.cpp:255

llvm::TargetMachine::getObjFileLowering
virtual TargetLoweringObjectFile * getObjFileLowering() const
Definition TargetMachine.h:142

llvm::TargetMachine::shouldAssumeDSOLocal
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
Definition TargetMachine.cpp:198

llvm::TargetMachine::Options
TargetOptions Options
Definition TargetMachine.h:124

llvm::TargetOptions
Definition TargetOptions.h:119

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:242

llvm::TargetSubtargetInfo::isRegisterReservedByUser
virtual bool isRegisterReservedByUser(Register R) const
Definition TargetSubtargetInfo.h:366

llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition TargetSubtargetInfo.h:100

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.

llvm::Target
Target - Wrapper for Target specific information.
Definition TargetRegistry.h:148

llvm::Triple::isOSBinFormatCOFF
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:785

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82

llvm::TypeSize
Definition TypeSize.h:332

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::getIntegerBitWidth
LLVM_ABI unsigned getIntegerBitWidth() const
Definition DerivedTypes.h:107

llvm::Type::getStructElementType
LLVM_ABI Type * getStructElementType(unsigned N) const
Definition DerivedTypes.h:453

llvm::Type::isScalableTy
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368

llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276

llvm::Type::isRISCVVectorTupleTy
LLVM_ABI bool isRISCVVectorTupleTy() const
Definition Type.cpp:146

llvm::Type::getPrimitiveSizeInBits
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197

llvm::Type::isTargetExtTy
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:205

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition Use.h:35

llvm::Use::getOperandNo
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36

llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61

llvm::User
Definition User.h:44

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::User::getNumOperands
unsigned getNumOperands() const
Definition User.h:229

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::Value::uses
iterator_range< use_iterator > uses()
Definition Value.h:380

llvm::cl::opt
Definition CommandLine.h:1454

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212

llvm::details::FixedOrScalableQuantity::isKnownMultipleOf
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200

llvm::details::FixedOrScalableQuantity< TypeSize, uint64_t >::isKnownLE
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230

llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165

llvm::details::FixedOrScalableQuantity::isZero
constexpr bool isZero() const
Definition TypeSize.h:153

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

uint16_t

uint32_t

uint64_t

unsigned

INT64_MIN
#define INT64_MIN
Definition DataTypes.h:74

INT64_MAX
#define INT64_MAX
Definition DataTypes.h:71

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:245

llvm::AArch64::RM
@ RM
Definition AArch64ISelLowering.h:34

llvm::AArch64::Fixups
Fixups
Definition AArch64FixupKinds.h:17

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:152

llvm::ARM_MB::LD
@ LD
Definition ARMBaseInfo.h:72

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M
@ M
Definition ARMTargetParser.h:171

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::RISCV_VectorCall
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition CallingConv.h:268

llvm::CallingConv::PreserveMost
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63

llvm::CallingConv::GHC
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50

llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41

llvm::CallingConv::Tail
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76

llvm::CallingConv::GRAAL
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition CallingConv.h:255

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::CodeModel::Medium
@ Medium
Definition CodeGen.h:31

llvm::CodeModel::Large
@ Large
Definition CodeGen.h:31

llvm::CodeModel::Small
@ Small
Definition CodeGen.h:31

llvm::IRSimilarity::Legal
@ Legal
Definition IRSimilarityIdentifier.h:77

llvm::ISD::isConstantSplatVectorAllOnes
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
Definition SelectionDAG.cpp:182

llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition SelectionDAGNodes.h:3421

llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41

llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823

llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition ISDOpcodes.h:1264

llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition ISDOpcodes.h:1260

llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511

llvm::ISD::STORE
@ STORE
Definition ISDOpcodes.h:1170

llvm::ISD::LRINT
@ LRINT
Definition ISDOpcodes.h:1070

llvm::ISD::DELETED_NODE
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45

llvm::ISD::CLMULH
@ CLMULH
Definition ISDOpcodes.h:780

llvm::ISD::SET_FPENV
@ SET_FPENV
Sets the current floating-point environment.
Definition ISDOpcodes.h:1136

llvm::ISD::PARTIAL_REDUCE_SMLA
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
Definition ISDOpcodes.h:1544

llvm::ISD::VECREDUCE_SEQ_FADD
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition ISDOpcodes.h:1492

llvm::ISD::FP_TO_BF16
@ FP_TO_BF16
Definition ISDOpcodes.h:1017

llvm::ISD::JumpTable
@ JumpTable
Definition ISDOpcodes.h:91

llvm::ISD::FLOG10
@ FLOG10
Definition ISDOpcodes.h:1057

llvm::ISD::MLOAD
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition ISDOpcodes.h:1431

llvm::ISD::VECREDUCE_SMIN
@ VECREDUCE_SMIN
Definition ISDOpcodes.h:1525

llvm::ISD::MASKED_SREM
@ MASKED_SREM
Definition ISDOpcodes.h:1631

llvm::ISD::SREM
@ SREM
Definition ISDOpcodes.h:269

llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275

llvm::ISD::ATOMIC_LOAD_NAND
@ ATOMIC_LOAD_NAND
Definition ISDOpcodes.h:1407

llvm::ISD::UDIV
@ UDIV
Definition ISDOpcodes.h:268

llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600

llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition ISDOpcodes.h:885

llvm::ISD::UMIN
@ UMIN
Definition ISDOpcodes.h:729

llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783

llvm::ISD::ROTR
@ ROTR
Definition ISDOpcodes.h:773

llvm::ISD::FPOW
@ FPOW
Definition ISDOpcodes.h:1043

llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition ISDOpcodes.h:1293

llvm::ISD::ConstantFP
@ ConstantFP
Definition ISDOpcodes.h:87

llvm::ISD::ATOMIC_LOAD_MAX
@ ATOMIC_LOAD_MAX
Definition ISDOpcodes.h:1409

llvm::ISD::UADDO
@ UADDO
Definition ISDOpcodes.h:349

llvm::ISD::FTRUNC
@ FTRUNC
Definition ISDOpcodes.h:1062

llvm::ISD::SDIV
@ SDIV
Definition ISDOpcodes.h:267

llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition ISDOpcodes.h:461

llvm::ISD::ATOMIC_LOAD_UMIN
@ ATOMIC_LOAD_UMIN
Definition ISDOpcodes.h:1410

llvm::ISD::RESET_FPENV
@ RESET_FPENV
Set floating-point environment to default state.
Definition ISDOpcodes.h:1140

llvm::ISD::LLRINT
@ LLRINT
Definition ISDOpcodes.h:1071

llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264

llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition ISDOpcodes.h:1169

llvm::ISD::SET_FPMODE
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition ISDOpcodes.h:1159

llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition ISDOpcodes.h:432

llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857

llvm::ISD::CTTZ_ELTS
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
Definition ISDOpcodes.h:1590

llvm::ISD::FSUB
@ FSUB
Definition ISDOpcodes.h:418

llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518

llvm::ISD::FMODF
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
Definition ISDOpcodes.h:1126

llvm::ISD::FABS
@ FABS
Definition ISDOpcodes.h:1031

llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition ISDOpcodes.h:1064

llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220

llvm::ISD::RETURNADDR
@ RETURNADDR
Definition ISDOpcodes.h:111

llvm::ISD::MASKED_SDIV
@ MASKED_SDIV
Definition ISDOpcodes.h:1629

llvm::ISD::GlobalAddress
@ GlobalAddress
Definition ISDOpcodes.h:88

llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884

llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584

llvm::ISD::VECREDUCE_FMAX
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition ISDOpcodes.h:1510

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417

llvm::ISD::VECREDUCE_FMAXIMUM
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition ISDOpcodes.h:1514

llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747

llvm::ISD::MEMBARRIER
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition ISDOpcodes.h:1366

llvm::ISD::FEXP10
@ FEXP10
Definition ISDOpcodes.h:1060

llvm::ISD::ATOMIC_FENCE
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition ISDOpcodes.h:1371

llvm::ISD::RESET_FPMODE
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition ISDOpcodes.h:1163

llvm::ISD::FP_TO_FP16
@ FP_TO_FP16
Definition ISDOpcodes.h:1008

llvm::ISD::UDIVREM
@ UDIVREM
Definition ISDOpcodes.h:281

llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280

llvm::ISD::VECREDUCE_SMAX
@ VECREDUCE_SMAX
Definition ISDOpcodes.h:1524

llvm::ISD::SRL
@ SRL
Definition ISDOpcodes.h:771

llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition ISDOpcodes.h:512

llvm::ISD::MASKED_UREM
@ MASKED_UREM
Definition ISDOpcodes.h:1630

llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition ISDOpcodes.h:1110

llvm::ISD::FP16_TO_FP
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:1007

llvm::ISD::ATOMIC_LOAD_OR
@ ATOMIC_LOAD_OR
Definition ISDOpcodes.h:1405

llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997

llvm::ISD::STRICT_FDIV
@ STRICT_FDIV
Definition ISDOpcodes.h:430

llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254

llvm::ISD::ATOMIC_LOAD_XOR
@ ATOMIC_LOAD_XOR
Definition ISDOpcodes.h:1406

llvm::ISD::FFLOOR
@ FFLOOR
Definition ISDOpcodes.h:1067

llvm::ISD::CLMUL
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778

llvm::ISD::INIT_TRAMPOLINE
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition ISDOpcodes.h:1337

llvm::ISD::FLDEXP
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition ISDOpcodes.h:1046

llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438

llvm::ISD::BUILTIN_OP_END
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition ISDOpcodes.h:1644

llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition ISDOpcodes.h:89

llvm::ISD::SRA
@ SRA
Definition ISDOpcodes.h:770

llvm::ISD::FrameIndex
@ FrameIndex
Definition ISDOpcodes.h:90

llvm::ISD::STRICT_FMUL
@ STRICT_FMUL
Definition ISDOpcodes.h:429

llvm::ISD::CTLZ_ZERO_POISON
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792

llvm::ISD::LLROUND
@ LLROUND
Definition ISDOpcodes.h:1069

llvm::ISD::SET_ROUNDING
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:979

llvm::ISD::USUBO
@ USUBO
Definition ISDOpcodes.h:353

llvm::ISD::AVGFLOORU
@ AVGFLOORU
Definition ISDOpcodes.h:711

llvm::ISD::PARTIAL_REDUCE_UMLA
@ PARTIAL_REDUCE_UMLA
Definition ISDOpcodes.h:1545

llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848

llvm::ISD::FLOG2
@ FLOG2
Definition ISDOpcodes.h:1056

llvm::ISD::AVGCEILS
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715

llvm::ISD::STRICT_UINT_TO_FP
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485

llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665

llvm::ISD::READSTEADYCOUNTER
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition ISDOpcodes.h:1326

llvm::ISD::USHLSAT
@ USHLSAT
Definition ISDOpcodes.h:387

llvm::ISD::VECREDUCE_FADD
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition ISDOpcodes.h:1507

llvm::ISD::UADDSAT
@ UADDSAT
Definition ISDOpcodes.h:366

llvm::ISD::FMAXNUM
@ FMAXNUM
Definition ISDOpcodes.h:1088

llvm::ISD::FPOWI
@ FPOWI
Definition ISDOpcodes.h:1044

llvm::ISD::FRINT
@ FRINT
Definition ISDOpcodes.h:1063

llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition ISDOpcodes.h:1359

llvm::ISD::VECREDUCE_FMIN
@ VECREDUCE_FMIN
Definition ISDOpcodes.h:1511

llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition ISDOpcodes.h:1118

llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition ISDOpcodes.h:466

llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition ISDOpcodes.h:1030

llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition ISDOpcodes.h:1215

llvm::ISD::CTTZ
@ CTTZ
Definition ISDOpcodes.h:784

llvm::ISD::SSUBO
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352

llvm::ISD::ATOMIC_LOAD_MIN
@ ATOMIC_LOAD_MIN
Definition ISDOpcodes.h:1408

llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition ISDOpcodes.h:931

llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition ISDOpcodes.h:1194

llvm::ISD::VECTOR_INTERLEAVE
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:635

llvm::ISD::STEP_VECTOR
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:691

llvm::ISD::OR
@ OR
Definition ISDOpcodes.h:740

llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541

llvm::ISD::IS_FPCLASS
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548

llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374

llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition ISDOpcodes.h:838

llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800

llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition ISDOpcodes.h:276

llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition ISDOpcodes.h:1375

llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233

llvm::ISD::VECREDUCE_UMAX
@ VECREDUCE_UMAX
Definition ISDOpcodes.h:1526

llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247

llvm::ISD::SPLAT_VECTOR
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672

llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition ISDOpcodes.h:1289

llvm::ISD::FSHL
@ FSHL
Definition ISDOpcodes.h:774

llvm::ISD::AVGCEILU
@ AVGCEILU
Definition ISDOpcodes.h:716

llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348

llvm::ISD::FSHR
@ FSHR
Definition ISDOpcodes.h:775

llvm::ISD::FROUND
@ FROUND
Definition ISDOpcodes.h:1065

llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition ISDOpcodes.h:465

llvm::ISD::USUBSAT
@ USUBSAT
Definition ISDOpcodes.h:375

llvm::ISD::CTLS
@ CTLS
Count leading redundant sign bits.
Definition ISDOpcodes.h:796

llvm::ISD::VECREDUCE_ADD
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition ISDOpcodes.h:1519

llvm::ISD::GET_ROUNDING
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:974

llvm::ISD::STRICT_FP_TO_FP16
@ STRICT_FP_TO_FP16
Definition ISDOpcodes.h:1010

llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704

llvm::ISD::GET_FPMODE
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition ISDOpcodes.h:1154

llvm::ISD::STRICT_FP16_TO_FP
@ STRICT_FP16_TO_FP
Definition ISDOpcodes.h:1009

llvm::ISD::GET_FPENV
@ GET_FPENV
Gets the current floating-point environment.
Definition ISDOpcodes.h:1131

llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769

llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649

llvm::ISD::ATOMIC_LOAD_AND
@ ATOMIC_LOAD_AND
Definition ISDOpcodes.h:1403

llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614

llvm::ISD::FCOS
@ FCOS
Definition ISDOpcodes.h:1035

llvm::ISD::XOR
@ XOR
Definition ISDOpcodes.h:741

llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576

llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224

llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854

llvm::ISD::DEBUGTRAP
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition ISDOpcodes.h:1349

llvm::ISD::FP_TO_UINT_SAT
@ FP_TO_UINT_SAT
Definition ISDOpcodes.h:950

llvm::ISD::CTPOP
@ CTPOP
Definition ISDOpcodes.h:786

llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815

llvm::ISD::FMUL
@ FMUL
Definition ISDOpcodes.h:419

llvm::ISD::MSTORE
@ MSTORE
Definition ISDOpcodes.h:1432

llvm::ISD::VSCALE
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition ISDOpcodes.h:1482

llvm::ISD::VECREDUCE_XOR
@ VECREDUCE_XOR
Definition ISDOpcodes.h:1523

llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition ISDOpcodes.h:1386

llvm::ISD::ATOMIC_LOAD_UMAX
@ ATOMIC_LOAD_UMAX
Definition ISDOpcodes.h:1411

llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition ISDOpcodes.h:839

llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition ISDOpcodes.h:1087

llvm::ISD::SUB
@ SUB
Definition ISDOpcodes.h:265

llvm::ISD::MULHS
@ MULHS
Definition ISDOpcodes.h:705

llvm::ISD::SSHLSAT
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386

llvm::ISD::VECREDUCE_AND
@ VECREDUCE_AND
Definition ISDOpcodes.h:1521

llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition ISDOpcodes.h:1179

llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition ISDOpcodes.h:468

llvm::ISD::ConstantPool
@ ConstantPool
Definition ISDOpcodes.h:92

llvm::ISD::VECTOR_SPLICE_LEFT
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653

llvm::ISD::ANY_EXTEND_VECTOR_INREG
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903

llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892

llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727

llvm::ISD::MASKED_UDIV
@ MASKED_UDIV
Masked vector arithmetic that returns poison on disabled lanes.
Definition ISDOpcodes.h:1628

llvm::ISD::Constant
@ Constant
Definition ISDOpcodes.h:86

llvm::ISD::VECTOR_REVERSE
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:640

llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982

llvm::ISD::VECREDUCE_OR
@ VECREDUCE_OR
Definition ISDOpcodes.h:1522

llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition ISDOpcodes.h:463

llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809

llvm::ISD::MSCATTER
@ MSCATTER
Definition ISDOpcodes.h:1444

llvm::ISD::STRICT_SINT_TO_FP
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484

llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition ISDOpcodes.h:1066

llvm::ISD::MGATHER
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition ISDOpcodes.h:1443

llvm::ISD::VECREDUCE_UMIN
@ VECREDUCE_UMIN
Definition ISDOpcodes.h:1527

llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition ISDOpcodes.h:462

llvm::ISD::STRICT_FROUNDEVEN
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:464

llvm::ISD::EH_DWARF_CFA
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150

llvm::ISD::FDIV
@ FDIV
Definition ISDOpcodes.h:420

llvm::ISD::BF16_TO_FP
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition ISDOpcodes.h:1016

llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110

llvm::ISD::FREM
@ FREM
Definition ISDOpcodes.h:421

llvm::ISD::ATOMIC_LOAD_ADD
@ ATOMIC_LOAD_ADD
Definition ISDOpcodes.h:1401

llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478

llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500

llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477

llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition ISDOpcodes.h:1109

llvm::ISD::ATOMIC_LOAD_SUB
@ ATOMIC_LOAD_SUB
Definition ISDOpcodes.h:1402

llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930

llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition ISDOpcodes.h:1320

llvm::ISD::CLMULR
@ CLMULR
Definition ISDOpcodes.h:779

llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505

llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739

llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition ISDOpcodes.h:1346

llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205

llvm::ISD::FLOG
@ FLOG
Definition ISDOpcodes.h:1055

llvm::ISD::AVGFLOORS
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710

llvm::ISD::VECTOR_SPLICE_RIGHT
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657

llvm::ISD::STRICT_FADD
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427

llvm::ISD::UREM
@ UREM
Definition ISDOpcodes.h:270

llvm::ISD::SPLAT_VECTOR_PARTS
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:681

llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565

llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53

llvm::ISD::FSIN
@ FSIN
Definition ISDOpcodes.h:1034

llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition ISDOpcodes.h:469

llvm::ISD::FEXP
@ FEXP
Definition ISDOpcodes.h:1058

llvm::ISD::FCEIL
@ FCEIL
Definition ISDOpcodes.h:1061

llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition ISDOpcodes.h:428

llvm::ISD::ATOMIC_SWAP
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition ISDOpcodes.h:1400

llvm::ISD::CTTZ_ZERO_POISON
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791

llvm::ISD::MUL
@ MUL
Definition ISDOpcodes.h:266

llvm::ISD::FFREXP
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition ISDOpcodes.h:1053

llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963

llvm::ISD::VECTOR_COMPRESS
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:699

llvm::ISD::LROUND
@ LROUND
Definition ISDOpcodes.h:1068

llvm::ISD::CTLZ
@ CTLZ
Definition ISDOpcodes.h:785

llvm::ISD::FMAXIMUMNUM
@ FMAXIMUMNUM
Definition ISDOpcodes.h:1115

llvm::ISD::CLEAR_CACHE
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
Definition ISDOpcodes.h:1636

llvm::ISD::STRICT_FLDEXP
@ STRICT_FLDEXP
Definition ISDOpcodes.h:441

llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition ISDOpcodes.h:467

llvm::ISD::ZERO_EXTEND_VECTOR_INREG
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925

llvm::ISD::VASTART
@ VASTART
Definition ISDOpcodes.h:1294

llvm::ISD::FSQRT
@ FSQRT
Definition ISDOpcodes.h:1032

llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458

llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:949

llvm::ISD::VECREDUCE_FMINIMUM
@ VECREDUCE_FMINIMUM
Definition ISDOpcodes.h:1515

llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860

llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition ISDOpcodes.h:1284

llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition ISDOpcodes.h:1208

llvm::ISD::ROTL
@ ROTL
Definition ISDOpcodes.h:772

llvm::ISD::BlockAddress
@ BlockAddress
Definition ISDOpcodes.h:94

llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837

llvm::ISD::BITREVERSE
@ BITREVERSE
Definition ISDOpcodes.h:787

llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534

llvm::ISD::PARTIAL_REDUCE_SUMLA
@ PARTIAL_REDUCE_SUMLA
Definition ISDOpcodes.h:1546

llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365

llvm::ISD::FEXP2
@ FEXP2
Definition ISDOpcodes.h:1059

llvm::ISD::SMAX
@ SMAX
Definition ISDOpcodes.h:728

llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition ISDOpcodes.h:457

llvm::ISD::VECTOR_DEINTERLEAVE
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:624

llvm::ISD::CTTZ_ELTS_ZERO_POISON
@ CTTZ_ELTS_ZERO_POISON
Definition ISDOpcodes.h:1591

llvm::ISD::UMAX
@ UMAX
Definition ISDOpcodes.h:730

llvm::ISD::FMINIMUMNUM
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition ISDOpcodes.h:1114

llvm::ISD::TRUNCATE_SSAT_S
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:875

llvm::ISD::ABDS
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722

llvm::ISD::ADJUST_TRAMPOLINE
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition ISDOpcodes.h:1343

llvm::ISD::TRUNCATE_USAT_U
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:879

llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213

llvm::ISD::ABDU
@ ABDU
Definition ISDOpcodes.h:723

llvm::ISD::ABS_MIN_POISON
@ ABS_MIN_POISON
ABS with a poison result for INT_MIN.
Definition ISDOpcodes.h:751

llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556

llvm::ISD::isBuildVectorOfConstantSDNodes
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
Definition SelectionDAG.cpp:275

llvm::ISD::isNormalStore
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Definition SelectionDAGNodes.h:3452

llvm::ISD::isExtOpcode
bool isExtOpcode(unsigned Opcode)
Definition ISDOpcodes.h:1847

llvm::ISD::isConstantSplatVectorAllZeros
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
Definition SelectionDAG.cpp:228

llvm::ISD::getSetCCInverse
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition SelectionDAG.cpp:658

llvm::ISD::getVPMaskIdx
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
Definition SelectionDAG.cpp:572

llvm::ISD::getVPExplicitVectorLengthIdx
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
Definition SelectionDAG.cpp:584

llvm::ISD::getSetCCSwappedOperands
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition SelectionDAG.cpp:635

llvm::ISD::MemIndexType
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition ISDOpcodes.h:1738

llvm::ISD::UNSIGNED_SCALED
@ UNSIGNED_SCALED
Definition ISDOpcodes.h:1738

llvm::ISD::isBuildVectorAllZeros
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition SelectionDAG.cpp:271

llvm::ISD::isConstantSplatVector
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
Definition SelectionDAG.cpp:151

llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition ISDOpcodes.h:1725

llvm::ISD::POST_INC
@ POST_INC
Definition ISDOpcodes.h:1725

llvm::ISD::PRE_INC
@ PRE_INC
Definition ISDOpcodes.h:1725

llvm::ISD::UNINDEXED
@ UNINDEXED
Definition ISDOpcodes.h:1725

llvm::ISD::isBuildVectorOfConstantFPSDNodes
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
Definition SelectionDAG.cpp:288

llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition ISDOpcodes.h:1776

llvm::ISD::SETOEQ
@ SETOEQ
Definition ISDOpcodes.h:1779

llvm::ISD::SETUNE
@ SETUNE
Definition ISDOpcodes.h:1792

llvm::ISD::SETUEQ
@ SETUEQ
Definition ISDOpcodes.h:1787

llvm::ISD::SETOLE
@ SETOLE
Definition ISDOpcodes.h:1783

llvm::ISD::SETOLT
@ SETOLT
Definition ISDOpcodes.h:1782

llvm::ISD::SETNE
@ SETNE
Definition ISDOpcodes.h:1801

llvm::ISD::SETUGT
@ SETUGT
Definition ISDOpcodes.h:1788

llvm::ISD::SETOGT
@ SETOGT
Definition ISDOpcodes.h:1780

llvm::ISD::SETULT
@ SETULT
Definition ISDOpcodes.h:1790

llvm::ISD::SETUO
@ SETUO
Definition ISDOpcodes.h:1786

llvm::ISD::SETONE
@ SETONE
Definition ISDOpcodes.h:1784

llvm::ISD::SETGT
@ SETGT
Definition ISDOpcodes.h:1797

llvm::ISD::SETLT
@ SETLT
Definition ISDOpcodes.h:1799

llvm::ISD::SETO
@ SETO
Definition ISDOpcodes.h:1785

llvm::ISD::SETGE
@ SETGE
Definition ISDOpcodes.h:1798

llvm::ISD::SETUGE
@ SETUGE
Definition ISDOpcodes.h:1789

llvm::ISD::SETLE
@ SETLE
Definition ISDOpcodes.h:1800

llvm::ISD::SETULE
@ SETULE
Definition ISDOpcodes.h:1791

llvm::ISD::SETOGE
@ SETOGE
Definition ISDOpcodes.h:1781

llvm::ISD::SETEQ
@ SETEQ
Definition ISDOpcodes.h:1796

llvm::ISD::isBuildVectorAllOnes
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
Definition SelectionDAG.cpp:267

llvm::ISD::getVecReduceBaseOpcode
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
Definition SelectionDAG.cpp:452

llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition ISDOpcodes.h:1756

llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition ISDOpcodes.h:1756

llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition ISDOpcodes.h:1756

llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition ISDOpcodes.h:1756

llvm::ISD::EXTLOAD
@ EXTLOAD
Definition ISDOpcodes.h:1756

llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition SelectionDAGNodes.h:3414

llvm::ISD::isIntEqualitySetCC
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition ISDOpcodes.h:1821

llvm::Intrinsic
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
Definition GenericSSAContext.h:27

llvm::Intrinsic::getOrInsertDeclaration
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition Intrinsics.cpp:780

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition LegacyLegalizerInfo.h:56

llvm::M68k::MemAddrModeKind::V
@ V
Definition M68kBaseInfo.h:62

llvm::M68k::MemAddrModeKind::L
@ L
Definition M68kBaseInfo.h:69

llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition MIPatternMatch.h:943

llvm::MIPatternMatch::m_OneUse
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Definition MIPatternMatch.h:56

llvm::NVPTXISD::MLoad
@ MLoad
Definition NVPTXSelectionDAGInfo.h:39

llvm::NVPTX::Const
@ Const
Definition NVPTX.h:206

llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition PatternMatch.h:492

llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition PatternMatch.h:1268

llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition PatternMatch.h:1154

llvm::PatternMatch::m_Poison
auto m_Poison()
Match an arbitrary poison constant.
Definition PatternMatch.h:173

llvm::PatternMatch::m_Trunc
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition PatternMatch.h:2195

llvm::PatternMatch::m_Xor
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition PatternMatch.h:1280

llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition PatternMatch.h:1032

llvm::PatternMatch::m_FMul
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
Definition PatternMatch.h:1226

llvm::PatternMatch::m_Deferred
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition PatternMatch.h:951

llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition PatternMatch.h:943

llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition PatternMatch.h:1938

llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition PatternMatch.h:562

llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition PatternMatch.h:1900

llvm::PatternMatch::m_BinOp
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition PatternMatch.h:141

llvm::PatternMatch::m_Value
auto m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:135

llvm::PatternMatch::m_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition PatternMatch.h:1220

llvm::PatternMatch::m_AddLike
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
Definition PatternMatch.h:1508

llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition PatternMatch.h:1208

llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition PatternMatch.h:1286

llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition PatternMatch.h:167

llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition PatternMatch.h:1274

llvm::PatternMatch::m_SExt
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
Definition PatternMatch.h:2221

llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition PatternMatch.h:591

llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition PatternMatch.h:1930

llvm::RISCVABI::ABI
ABI
Definition RISCVBaseInfo.h:668

llvm::RISCVABI::ABI_ILP32D
@ ABI_ILP32D
Definition RISCVBaseInfo.h:671

llvm::RISCVABI::ABI_LP64F
@ ABI_LP64F
Definition RISCVBaseInfo.h:678

llvm::RISCVABI::ABI_ILP32F
@ ABI_ILP32F
Definition RISCVBaseInfo.h:670

llvm::RISCVABI::ABI_ILP32
@ ABI_ILP32
Definition RISCVBaseInfo.h:669

llvm::RISCVABI::ABI_Unknown
@ ABI_Unknown
Definition RISCVBaseInfo.h:684

llvm::RISCVABI::ABI_ILP32E
@ ABI_ILP32E
Definition RISCVBaseInfo.h:672

llvm::RISCVABI::ABI_LP64E
@ ABI_LP64E
Definition RISCVBaseInfo.h:680

llvm::RISCVABI::ABI_LP64
@ ABI_LP64
Definition RISCVBaseInfo.h:677

llvm::RISCVABI::ABI_LP64D
@ ABI_LP64D
Definition RISCVBaseInfo.h:679

llvm::RISCVCC::CondCode
CondCode
Definition RISCVInstrInfo.h:55

llvm::RISCVCC::getBrCond
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
Definition RISCVInstrInfo.cpp:1177

llvm::RISCVExceptFlags
Definition RISCVBaseInfo.h:598

llvm::RISCVFPRndMode::RoundingMode
RoundingMode
Definition RISCVBaseInfo.h:497

llvm::RISCVFPRndMode::RUP
@ RUP
Definition RISCVBaseInfo.h:501

llvm::RISCVFPRndMode::DYN
@ DYN
Definition RISCVBaseInfo.h:503

llvm::RISCVFPRndMode::RTZ
@ RTZ
Definition RISCVBaseInfo.h:499

llvm::RISCVFPRndMode::RDN
@ RDN
Definition RISCVBaseInfo.h:500

llvm::RISCVFPRndMode::RMM
@ RMM
Definition RISCVBaseInfo.h:502

llvm::RISCVFPRndMode::Invalid
@ Invalid
Definition RISCVBaseInfo.h:504

llvm::RISCVFPRndMode::RNE
@ RNE
Definition RISCVBaseInfo.h:498

llvm::RISCVII::MO_TPREL_HI
@ MO_TPREL_HI
Definition RISCVBaseInfo.h:469

llvm::RISCVII::MO_CALL
@ MO_CALL
Definition RISCVBaseInfo.h:462

llvm::RISCVII::MO_TPREL_LO
@ MO_TPREL_LO
Definition RISCVBaseInfo.h:468

llvm::RISCVII::MO_HI
@ MO_HI
Definition RISCVBaseInfo.h:464

llvm::RISCVII::MO_LO
@ MO_LO
Definition RISCVBaseInfo.h:463

llvm::RISCVII::MO_TPREL_ADD
@ MO_TPREL_ADD
Definition RISCVBaseInfo.h:470

llvm::RISCVII::getLMul
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
Definition RISCVBaseInfo.h:288

llvm::RISCVII::getFRMOpNum
static int getFRMOpNum(const MCInstrDesc &Desc)
Definition RISCVBaseInfo.h:422

llvm::RISCVII::getSEWOpNum
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
Definition RISCVBaseInfo.h:406

llvm::RISCVLoadFPImm::getLoadFPImm
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
Definition RISCVBaseInfo.cpp:192

llvm::RISCVMatInt::generateInstSeq
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
Definition RISCVMatInt.cpp:294

llvm::RISCVMatInt::getIntMatCost
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
Definition RISCVMatInt.cpp:577

llvm::RISCVMatInt::generateTwoRegInstSeq
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
Definition RISCVMatInt.cpp:544

llvm::RISCVMatInt::InstSeq
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43

llvm::RISCVVIntrinsicsTable
Definition RISCVISelLowering.cpp:26945

llvm::RISCVVInversePseudosTable::getBaseInfo
const PseudoInfo * getBaseInfo(unsigned BaseInstr, uint8_t VLMul, uint8_t SEW, bool IsAltFmt=false)
Definition RISCVBaseInfo.h:804

llvm::RISCVVType::encodeLMUL
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
Definition RISCVTargetParser.h:168

llvm::RISCVVType::decodeVSEW
static unsigned decodeVSEW(unsigned VSEW)
Definition RISCVTargetParser.h:174

llvm::RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
Definition RISCVTargetParser.h:111

llvm::RISCVVType::TAIL_AGNOSTIC
@ TAIL_AGNOSTIC
Definition RISCVTargetParser.h:112

llvm::RISCVVType::MASK_AGNOSTIC
@ MASK_AGNOSTIC
Definition RISCVTargetParser.h:113

llvm::RISCVVType::VLMUL
VLMUL
Definition RISCVTargetParser.h:99

llvm::RISCVVType::LMUL_4
@ LMUL_4
Definition RISCVTargetParser.h:102

llvm::RISCVVType::LMUL_1
@ LMUL_1
Definition RISCVTargetParser.h:100

llvm::RISCVVType::LMUL_2
@ LMUL_2
Definition RISCVTargetParser.h:101

llvm::RISCVVType::LMUL_8
@ LMUL_8
Definition RISCVTargetParser.h:103

llvm::RISCVVType::LMUL_F4
@ LMUL_F4
Definition RISCVTargetParser.h:106

llvm::RISCVVType::LMUL_F8
@ LMUL_F8
Definition RISCVTargetParser.h:105

llvm::RISCVVType::LMUL_F2
@ LMUL_F2
Definition RISCVTargetParser.h:107

llvm::RISCVVType::decodeVLMUL
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
Definition RISCVTargetParser.cpp:480

llvm::RISCVVType::encodeSEW
static unsigned encodeSEW(unsigned SEW)
Definition RISCVTargetParser.h:179

llvm::RISCV::FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Negative_Zero
Definition RISCVInstrInfo.h:415

llvm::RISCV::FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Subnormal
Definition RISCVInstrInfo.h:417

llvm::RISCV::FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Positive_Normal
Definition RISCVInstrInfo.h:418

llvm::RISCV::FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Subnormal
Definition RISCVInstrInfo.h:414

llvm::RISCV::FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Negative_Normal
Definition RISCVInstrInfo.h:413

llvm::RISCV::FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Positive_Infinity
Definition RISCVInstrInfo.h:419

llvm::RISCV::FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
Definition RISCVInstrInfo.h:412

llvm::RISCV::FPMASK_Quiet_NaN
static constexpr unsigned FPMASK_Quiet_NaN
Definition RISCVInstrInfo.h:421

llvm::RISCV::getArgGPRs
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
Definition RISCVCallingConv.cpp:164

llvm::RISCV::FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Signaling_NaN
Definition RISCVInstrInfo.h:420

llvm::RISCV::FPMASK_Positive_Zero
static constexpr unsigned FPMASK_Positive_Zero
Definition RISCVInstrInfo.h:416

llvm::RISCV::RVVBitsPerBlock
static constexpr unsigned RVVBitsPerBlock
Definition RISCVTargetParser.h:68

llvm::RISCV::RVVBytesPerBlock
static constexpr unsigned RVVBytesPerBlock
Definition RISCVTargetParser.h:69

llvm::RTLIB::getFPTOUINT
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:393

llvm::RTLIB::getFPTOSINT
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:344

llvm::RTLIB::getFPROUND
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:297

llvm::Reloc::Model
Model
Definition CodeGen.h:25

llvm::SDPatternMatch
Definition SDPatternMatch.h:29

llvm::SDPatternMatch::m_ExactSr
auto m_ExactSr(const LHS &L, const RHS &R)
Definition SDPatternMatch.h:961

llvm::SDPatternMatch::m_Srl
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
Definition SDPatternMatch.h:957

llvm::SDPatternMatch::m_SpecificVT
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
Definition SDPatternMatch.h:382

llvm::SDPatternMatch::m_SpecificOpc
Opcode_match m_SpecificOpc(unsigned Opcode)
Definition SDPatternMatch.h:227

llvm::SDPatternMatch::m_InsertSubvector
TernaryOpc_match< LHS, RHS, IDX > m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx)
Definition SDPatternMatch.h:613

llvm::SDPatternMatch::m_AnyOf
Or< Preds... > m_AnyOf(const Preds &...preds)
Definition SDPatternMatch.h:219

llvm::SDPatternMatch::m_VectorReverse
UnaryOpc_match< Opnd > m_VectorReverse(const Opnd &Op)
Definition SDPatternMatch.h:1234

llvm::SDPatternMatch::m_SetCC
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
Definition SDPatternMatch.h:569

llvm::SDPatternMatch::m_Node
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
Definition SDPatternMatch.h:501

llvm::SDPatternMatch::sd_match
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
Definition SDPatternMatch.h:73

llvm::SDPatternMatch::m_SpecificCondCode
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
Definition SDPatternMatch.h:1542

llvm::SDPatternMatch::m_SpliceRight
TernaryOpc_match< T0_P, T1_P, T2_P > m_SpliceRight(const T0_P &V1, const T1_P &V2, const T2_P &Offset)
Definition SDPatternMatch.h:619

llvm::SDPatternMatch::m_ConstInt
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
Definition SDPatternMatch.h:1293

llvm::SI
Definition SIInstrInfo.h:1926

llvm::SPII::Store
@ Store
Definition SparcInstrInfo.h:33

llvm::SPII::Load
@ Load
Definition SparcInstrInfo.h:32

llvm::Sched::Source
@ Source
Definition TargetLowering.h:106

llvm::SyncScope::SingleThread
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55

llvm::SyncScope::System
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58

llvm::SyncScope::ID
uint8_t ID
Definition LLVMContext.h:47

llvm::TLSModel::Model
Model
Definition CodeGen.h:45

llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition CodeGen.h:47

llvm::TLSModel::InitialExec
@ InitialExec
Definition CodeGen.h:48

llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition CodeGen.h:46

llvm::TLSModel::LocalExec
@ LocalExec
Definition CodeGen.h:49

llvm::bitc::NoNaNs
@ NoNaNs
Definition LLVMBitCodes.h:550

llvm::cfg::UpdateKind::Insert
@ Insert
Definition CFGUpdate.h:26

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::codeview::CompileSym3Flags::Exp
@ Exp
Definition CodeView.h:459

llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
Definition CodeView.h:527

llvm::codeview::EncodedFramePtrReg::StackPtr
@ StackPtr
Definition CodeView.h:525

llvm::dwarf::Index
Index
Definition Dwarf.h:909

llvm::lltok::APFloat
@ APFloat
Definition LLToken.h:533

llvm::logicalview::LVAttributeKind::Zero
@ Zero
Definition LVOptions.h:130

llvm::lsp::TraceLevel::Off
@ Off
Definition Protocol.h:200

llvm::lsp::MessageType::Info
@ Info
Definition Protocol.h:1295

llvm::memprof::Meta::Start
@ Start
Definition MemProf.h:69

llvm::ms_demangle::QualifierMangleMode::Result
@ Result
Definition MicrosoftDemangle.h:132

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm::support::endian::read32le
uint32_t read32le(const void *P)
Definition Endian.h:432

llvm::tgtok::TrueVal
@ TrueVal
Definition TGLexer.h:57

llvm::tgtok::FalseVal
@ FalseVal
Definition TGLexer.h:58

llvm::yaml::NodeKind::Scalar
@ Scalar
Definition YAMLTraits.h:46

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition MachineBasicBlock.h:1509

llvm::Offset
@ Offset
Definition DWP.cpp:558

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738

llvm::MONontemporalBit1
static const MachineMemOperand::Flags MONontemporalBit1
Definition RISCVInstrInfo.h:50

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:449

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::isInt
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165

llvm::isNullConstant
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition SelectionDAG.cpp:13625

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition MachineInstrBuilder.h:61

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:65

llvm::Depth
@ Depth
Definition SIMachineScheduler.h:36

llvm::peekThroughBitcasts
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
Definition SelectionDAG.cpp:13725

llvm::LoopIdiomVectorizeStyle::Masked
@ Masked
Definition LoopIdiomVectorize.h:16

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553

llvm::getKillRegState
constexpr RegState getKillRegState(bool B)
Definition MachineInstrBuilder.h:90

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition AtomicOrdering.h:125

llvm::countr_one
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315

llvm::createRISCVMCCodeEmitter
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
Definition RISCVMCCodeEmitter.cpp:134

llvm::bit_width
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325

llvm::MONontemporalBit0
static const MachineMemOperand::Flags MONontemporalBit0
Definition RISCVInstrInfo.h:48

llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546

llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284

llvm::widenShuffleMaskElts
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition VectorUtils.cpp:539

llvm::getSplatValue
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition VectorUtils.cpp:390

llvm::isNullOrNullSplat
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551

llvm::reportFatalInternalError
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173

llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337

llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385

llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204

llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition AtomicOrdering.h:133

llvm::RetCC_RISCV
CCAssignFn RetCC_RISCV
This is used for assigning return values to locations when making calls.
Definition RISCVCallingConv.h:22

llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition RuntimeDyld.cpp:172

llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025

llvm::has_single_bit
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331

llvm::getImm
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition SPIRVUtils.cpp:1127

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::ComplexDeinterleavingOperation::Splat
@ Splat
Definition ComplexDeinterleavingPass.h:42

llvm::fcNegSubnormal
@ fcNegSubnormal
Definition FloatingPointMode.h:345

llvm::fcPosNormal
@ fcPosNormal
Definition FloatingPointMode.h:349

llvm::fcQNan
@ fcQNan
Definition FloatingPointMode.h:342

llvm::fcNegZero
@ fcNegZero
Definition FloatingPointMode.h:346

llvm::fcNegInf
@ fcNegInf
Definition FloatingPointMode.h:343

llvm::fcPosZero
@ fcPosZero
Definition FloatingPointMode.h:347

llvm::fcSNan
@ fcSNan
Definition FloatingPointMode.h:341

llvm::fcNegNormal
@ fcNegNormal
Definition FloatingPointMode.h:344

llvm::fcPosSubnormal
@ fcPosSubnormal
Definition FloatingPointMode.h:348

llvm::fcPosInf
@ fcPosInf
Definition FloatingPointMode.h:350

llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150

llvm::isMask_64
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189

llvm::CaptureComponents::Address
@ Address
Definition ModRef.h:368

llvm::isShifted359
int isShifted359(T Value, int &Shift)
Definition RISCVInstrInfo.h:30

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1151

llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::isOneOrOneSplat
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
Definition SelectionDAG.cpp:13853

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition raw_ostream.cpp:904

llvm::PackElem::Hi
@ Hi
Definition VECustomDAG.h:132

llvm::PackElem::Lo
@ Lo
Definition VECustomDAG.h:131

llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition AtomicOrdering.h:56

llvm::AtomicOrdering::Acquire
@ Acquire
Definition AtomicOrdering.h:61

llvm::AtomicOrdering::Release
@ Release
Definition AtomicOrdering.h:62

llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
Definition AtomicOrdering.h:64

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74

llvm::Data
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221

llvm::CombineLevel
CombineLevel
Definition DAGCombine.h:15

llvm::narrowShuffleMaskElts
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition VectorUtils.cpp:518

llvm::isMaskedSlidePair
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
Definition VectorUtils.cpp:488

llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
Definition IVDescriptors.h:44

llvm::RecurKind::SMin
@ SMin
Signed integer min implemented in terms of select(cmp()).
Definition IVDescriptors.h:45

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:39

llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21

llvm::Next
FunctionAddr VTableAddr Next
Definition InstrProf.h:141

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:25

llvm::RoundingMode
RoundingMode
Rounding mode.
Definition FloatingPointMode.h:38

llvm::RoundingMode::TowardZero
@ TowardZero
roundTowardZero.
Definition FloatingPointMode.h:40

llvm::RoundingMode::NearestTiesToEven
@ NearestTiesToEven
roundTiesToEven.
Definition FloatingPointMode.h:41

llvm::RoundingMode::TowardPositive
@ TowardPositive
roundTowardPositive.
Definition FloatingPointMode.h:42

llvm::RoundingMode::NearestTiesToAway
@ NearestTiesToAway
roundTiesToAway.
Definition FloatingPointMode.h:44

llvm::RoundingMode::TowardNegative
@ TowardNegative
roundTowardNegative.
Definition FloatingPointMode.h:43

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::isConstOrConstSplat
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition SelectionDAG.cpp:13776

llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition AtomicOrdering.h:129

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::PseudoProbeReservedId::Last
@ Last
Definition PseudoProbe.h:28

llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::isOneConstant
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Definition SelectionDAG.cpp:13644

llvm::UndefPoisonKind
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946

llvm::isNullFPConstant
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
Definition SelectionDAG.cpp:13634

llvm::SignExtend64
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572

llvm::processShuffleMasks
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition VectorUtils.cpp:664

llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::maskTrailingOnes
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77

llvm::AllocTokenMode::Increment
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26

llvm::ValueUniformity::Default
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20

llvm::isShiftedUInt
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198

llvm::fltNanEncoding::AllOnes
@ AllOnes
Definition APFloat.h:987

llvm::Make_64
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
Definition MathExtras.h:160

llvm::CC_RISCV
CCAssignFn CC_RISCV
This is used for assigining arguments to locations when making calls.
Definition RISCVCallingConv.h:19

llvm::isAllOnesConstant
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition SelectionDAG.cpp:13639

llvm::reportFatalUsageError
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863

raw_ostream.h

N
#define N

NC
#define NC
Definition regutils.h:42

InsertionPoint
Definition CFIFixup.cpp:186

RegInfo
Definition AMDGPUAsmParser.cpp:2887

VIDSequence
Definition RISCVISelLowering.cpp:3959

VIDSequence::StepNumerator
int64_t StepNumerator
Definition RISCVISelLowering.cpp:3960

VIDSequence::Addend
int64_t Addend
Definition RISCVISelLowering.cpp:3962

VIDSequence::StepDenominator
unsigned StepDenominator
Definition RISCVISelLowering.cpp:3961

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::Align::value
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77

llvm::EVT
Extended Value Type.
Definition ValueTypes.h:35

llvm::EVT::isScalableVectorOf
bool isScalableVectorOf(EVT EltVT) const
Return true if this is a scalable vector with matching element type.
Definition ValueTypes.h:192

llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90

llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418

llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145

llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70

llvm::EVT::getScalarStoreSize
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:425

llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307

llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323

llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155

llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373

llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396

llvm::EVT::isByteSized
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:266

llvm::EVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382

llvm::EVT::getRISCVVectorTupleNumFields
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:387

llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408

llvm::EVT::getHalfSizedIntegerVT
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:453

llvm::EVT::changeVectorElementType
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339

llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61

llvm::EVT::isRISCVVectorTuple
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:197

llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404

llvm::EVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition ValueTypes.h:199

llvm::EVT::getRoundIntegerType
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:442

llvm::EVT::isFixedLengthVectorOf
bool isFixedLengthVectorOf(EVT EltVT) const
Return true if this is a fixed length vector with matching element type.
Definition ValueTypes.h:205

llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176

llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346

llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:315

llvm::EVT::getTypeForEVT
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition ValueTypes.cpp:218

llvm::EVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187

llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351

llvm::EVT::isVectorOf
bool isVectorOf(EVT EltVT) const
Return true if this is a vector with matching element type.
Definition ValueTypes.h:181

llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165

llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359

llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:331

llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160

llvm::ISD::ArgFlagsTy
Definition TargetCallingConv.h:27

llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition TargetCallingConv.h:204

llvm::Inverse
Definition GraphTraits.h:123

llvm::KnownBits
Definition KnownBits.h:24

llvm::KnownBits::ashr
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
Definition KnownBits.cpp:547

llvm::KnownBits::urem
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition KnownBits.cpp:1252

llvm::KnownBits::isUnknown
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64

llvm::KnownBits::countMaxTrailingZeros
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288

llvm::KnownBits::trunc
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165

llvm::KnownBits::getBitWidth
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44

llvm::KnownBits::zext
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176

llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72

llvm::KnownBits::add
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:361

llvm::KnownBits::lshr
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
Definition KnownBits.cpp:491

llvm::KnownBits::countMaxActiveBits
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:310

llvm::KnownBits::intersectWith
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325

llvm::KnownBits::sext
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184

llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146

llvm::KnownBits::udiv
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition KnownBits.cpp:1212

llvm::KnownBits::countMaxLeadingZeros
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294

llvm::KnownBits::One
APInt One
Definition KnownBits.h:26

llvm::KnownBits::Zero
APInt Zero
Definition KnownBits.h:25

llvm::KnownBits::shl
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition KnownBits.cpp:407

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314

llvm::MachineFunction::CallSiteInfo
Definition MachineFunction.h:525

llvm::MachineFunction::CallSiteInfo::ArgRegPairs
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
Definition MachineFunction.h:527

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getStack
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition MachineOperand.cpp:1163

llvm::MachinePointerInfo::getConstantPool
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition MachineOperand.cpp:1144

llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition MachineMemOperand.h:82

llvm::MachinePointerInfo::getUnknownStack
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
Definition MachineOperand.cpp:1168

llvm::MachinePointerInfo::getGOT
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition MachineOperand.cpp:1159

llvm::MachinePointerInfo::getFixedStack
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition MachineOperand.cpp:1150

llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106

llvm::MemOp
Definition TargetLowering.h:119

llvm::RISCVRegisterInfo
Definition RISCVRegisterInfo.h:64

llvm::RISCVRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition RISCVRegisterInfo.cpp:816

llvm::RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
Definition RISCVISelLowering.h:645

llvm::RISCVVInversePseudosTable::PseudoInfo
Definition RISCVBaseInfo.h:793

llvm::RISCV::RISCVMaskedPseudoInfo
Definition RISCVInstrInfo.h:441

llvm::RISCV::RISCVMaskedPseudoInfo::MaskedPseudo
uint16_t MaskedPseudo
Definition RISCVInstrInfo.h:442

llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition SelectionDAGNodes.h:378

llvm::SDNodeFlags::Disjoint
@ Disjoint
Definition SelectionDAGNodes.h:395

llvm::SDNodeFlags::setExact
void setExact(bool b)
Definition SelectionDAGNodes.h:454

llvm::SDNodeFlags::hasDisjoint
bool hasDisjoint() const
Definition SelectionDAGNodes.h:474

llvm::SDPatternMatch::Not
Definition SDPatternMatch.h:197

llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition SelectionDAGNodes.h:80

llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Definition TargetLowering.h:2978

llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition TargetLowering.h:2980

llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition TargetLowering.h:2979

llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition TargetLowering.h:2981

llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition TargetLowering.h:2982

llvm::TargetLoweringBase::AddrMode::ScalableOffset
int64_t ScalableOffset
Definition TargetLowering.h:2983

llvm::TargetLoweringBase::IntrinsicInfo
Definition TargetLowering.h:1222

llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition TargetLowering.h:4831

llvm::TargetLowering::CallLoweringInfo::IsTailCall
bool IsTailCall
Definition TargetLowering.h:4850

llvm::TargetLowering::CallLoweringInfo::Callee
SDValue Callee
Definition TargetLowering.h:4857

llvm::TargetLowering::CallLoweringInfo::DL
SDLoc DL
Definition TargetLowering.h:4860

llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition TargetLowering.h:4839

llvm::TargetLowering::CallLoweringInfo::Ins
SmallVector< ISD::InputArg, 32 > Ins
Definition TargetLowering.h:4864

llvm::TargetLowering::CallLoweringInfo::CFIType
const ConstantInt * CFIType
Definition TargetLowering.h:4866

llvm::TargetLowering::CallLoweringInfo::Chain
SDValue Chain
Definition TargetLowering.h:4832

llvm::TargetLowering::CallLoweringInfo::NoMerge
bool NoMerge
Definition TargetLowering.h:4846

llvm::TargetLowering::CallLoweringInfo::CB
const CallBase * CB
Definition TargetLowering.h:4861

llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition TargetLowering.h:4862

llvm::TargetLowering::CallLoweringInfo::OutVals
SmallVector< SDValue, 32 > OutVals
Definition TargetLowering.h:4863

llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition TargetLowering.h:4856

llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition TargetLowering.h:4859

llvm::TargetLowering::DAGCombinerInfo
Definition TargetLowering.h:4535

llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition TargetLowering.h:4548

llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
LLVM_ABI void AddToWorklist(SDNode *N)
Definition DAGCombiner.cpp:932

llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer
bool isCalledByLegalizer() const
Definition TargetLowering.h:4550

llvm::TargetLowering::DAGCombinerInfo::recursivelyDeleteUnusedNodes
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
Definition DAGCombiner.cpp:952

llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition TargetLowering.h:4546

llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition TargetLowering.h:4541

llvm::TargetLowering::DAGCombinerInfo::CombineTo
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition DAGCombiner.cpp:937

llvm::TargetLowering::MakeLibCallOptions
This structure is used to pass arguments to makeLibCall function.
Definition TargetLowering.h:5033

llvm::TargetLowering::MakeLibCallOptions::setTypeListBeforeSoften
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
Definition TargetLowering.h:5070

llvm::TargetLowering::TargetLoweringOpt
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Definition TargetLowering.h:4229

llvm::TargetLowering::TargetLoweringOpt::DAG
SelectionDAG & DAG
Definition TargetLowering.h:4230

llvm::TargetLowering::TargetLoweringOpt::CombineTo
bool CombineTo(SDValue O, SDValue N)
Definition TargetLowering.h:4243

llvm::TargetLowering::TargetLoweringOpt::LegalOps
bool LegalOps
Definition TargetLowering.h:4232

llvm::cl::desc
Definition CommandLine.h:410

llvm::fltSemantics
Definition APFloat.h:997