doxygen/AArch64PostLegalizerLowering_8cpp_source.html

//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// Post-legalization lowering for instructions.

///

/// This is used to offload pattern matching from the selector.

///

/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually

/// a G_ZIP, G_UZP, etc.

///

/// General optimization combines should be handled by either the

/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.

///

//===----------------------------------------------------------------------===//


#include "AArch64.h"

#include "AArch64ExpandImm.h"

#include "AArch64GlobalISelUtils.h"

#include "AArch64PerfectShuffle.h"

#include "AArch64Subtarget.h"

#include "GISel/AArch64LegalizerInfo.h"

#include "MCTargetDesc/AArch64MCTargetDesc.h"

#include "Utils/AArch64BaseInfo.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachinePassManager.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/Support/ErrorHandling.h"

#include <optional>


#define GET_GICOMBINER_DEPS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "aarch64-postlegalizer-lowering"


using namespace llvm;

using namespace MIPatternMatch;

using namespace AArch64GISelUtils;


#define GET_GICOMBINER_TYPES

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_TYPES


namespace {


/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.

///

/// Used for matching target-supported shuffles before codegen.

struct ShuffleVectorPseudo {

  unsigned Opc;                 ///< Opcode for the instruction. (E.g. G_ZIP1)

  Register Dst;                 ///< Destination register.

  SmallVector<SrcOp, 2> SrcOps; ///< Source registers.

  ShuffleVectorPseudo(unsigned Opc, Register Dst,

                      std::initializer_list<SrcOp> SrcOps)

      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};

  ShuffleVectorPseudo() = default;

};


/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector

/// sources of the shuffle are different.

std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,

                                                    unsigned NumElts) {

  // Look for the first non-undef element.

  auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });

  if (FirstRealElt == M.end())

    return std::nullopt;


  // Use APInt to handle overflow when calculating expected element.

  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();

  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);


  // The following shuffle indices must be the successive elements after the

  // first real element.

  if (any_of(

          make_range(std::next(FirstRealElt), M.end()),

          [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))

    return std::nullopt;


  // The index of an EXT is the first element if it is not UNDEF.

  // Watch out for the beginning UNDEFs. The EXT index should be the expected

  // value of the first element.  E.g.

  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.

  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.

  // ExpectedElt is the last mask index plus 1.

  uint64_t Imm = ExpectedElt.getZExtValue();

  bool ReverseExt = false;


  // There are two difference cases requiring to reverse input vectors.

  // For example, for vector <4 x i32> we have the following cases,

  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)

  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)

  // For both cases, we finally use mask <5, 6, 7, 0>, which requires

  // to reverse two input vectors.

  if (Imm < NumElts)

    ReverseExt = true;

  else

    Imm -= NumElts;

  return std::make_pair(ReverseExt, Imm);

}


/// Helper function for matchINS.

///

/// \returns a value when \p M is an ins mask for \p NumInputElements.

///

/// First element of the returned pair is true when the produced

/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.

///

/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.

std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,

                                              int NumInputElements) {

  if (M.size() != static_cast<size_t>(NumInputElements))

    return std::nullopt;

  int NumLHSMatch = 0, NumRHSMatch = 0;

  int LastLHSMismatch = -1, LastRHSMismatch = -1;

  for (int Idx = 0; Idx < NumInputElements; ++Idx) {

    if (M[Idx] == -1) {

      ++NumLHSMatch;

      ++NumRHSMatch;

      continue;

    }

    M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;

    M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;

  }

  const int NumNeededToMatch = NumInputElements - 1;

  if (NumLHSMatch == NumNeededToMatch)

    return std::make_pair(true, LastLHSMismatch);

  if (NumRHSMatch == NumNeededToMatch)

    return std::make_pair(false, LastRHSMismatch);

  return std::nullopt;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a

/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.

bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  LLT Ty = MRI.getType(Dst);

  unsigned EltSize = Ty.getScalarSizeInBits();


  // Element size for a rev cannot be 64.

  if (EltSize == 64)

    return false;


  unsigned NumElts = Ty.getNumElements();


  // Try to produce a G_REV instruction

  for (unsigned LaneSize : {64U, 32U, 16U}) {

    if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {

      unsigned Opcode;

      if (LaneSize == 64U)

        Opcode = AArch64::G_REV64;

      else if (LaneSize == 32U)

        Opcode = AArch64::G_REV32;

      else

        Opcode = AArch64::G_BSWAP;


      MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});

      return true;

    }

  }


  return false;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with

/// a G_TRN1 or G_TRN2 instruction.

bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  unsigned OperandOrder;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;

  Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();

  Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with

/// a G_UZP1 or G_UZP2 instruction.

///

/// \param [in] MI - The shuffle vector instruction.

/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.

bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isUZPMask(ShuffleMask, NumElts, WhichResult))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  unsigned WhichResult;

  unsigned OperandOrder;

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  unsigned NumElts = MRI.getType(Dst).getNumElements();

  if (!isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder))

    return false;

  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;

  Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();

  Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();

  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

  return true;

}


/// Helper function for matchDup.

bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,

                                 MachineRegisterInfo &MRI,

                                 ShuffleVectorPseudo &MatchInfo) {

  if (Lane != 0)

    return false;


  // Try to match a vector splat operation into a dup instruction.

  // We're looking for this pattern:

  //

  // %scalar:gpr(s64) = COPY $x0

  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF

  // %cst0:gpr(s32) = G_CONSTANT i32 0

  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)

  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)

  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,

  // %zerovec(<2 x s32>)

  //

  // ...into:

  // %splat = G_DUP %scalar


  // Begin matching the insert.

  auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,

                             MI.getOperand(1).getReg(), MRI);

  if (!InsMI)

    return false;

  // Match the undef vector operand.

  if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),

                    MRI))

    return false;


  // Match the index constant 0.

  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))

    return false;


  MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),

                                  {InsMI->getOperand(2).getReg()});

  return true;

}


/// Helper function for matchDup.

bool matchDupFromBuildVector(int Lane, MachineInstr &MI,

                             MachineRegisterInfo &MRI,

                             ShuffleVectorPseudo &MatchInfo) {

  assert(Lane >= 0 && "Expected positive lane?");

  int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();

  // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the

  // lane's definition directly.

  auto *BuildVecMI =

      getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,

                   MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);

  // If Lane >= NumElements then it is point to RHS, just check from RHS

  if (NumElements <= Lane)

    Lane -= NumElements;


  if (!BuildVecMI)

    return false;

  Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();

  MatchInfo =

      ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});

  return true;

}


bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  auto MaybeLane = getSplatIndex(MI);

  if (!MaybeLane)

    return false;

  int Lane = *MaybeLane;

  // If this is undef splat, generate it via "just" vdup, if possible.

  if (Lane < 0)

    Lane = 0;

  if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))

    return true;

  if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))

    return true;

  return false;

}


// Check if an EXT instruction can handle the shuffle mask when the vector

// sources of the shuffle are the same.

bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {

  unsigned NumElts = Ty.getNumElements();


  // Assume that the first shuffle index is not UNDEF.  Fail if it is.

  if (M[0] < 0)

    return false;


  // If this is a VEXT shuffle, the immediate value is the index of the first

  // element.  The other shuffle indices must be the successive elements after

  // the first one.

  unsigned ExpectedElt = M[0];

  for (unsigned I = 1; I < NumElts; ++I) {

    // Increment the expected index.  If it wraps around, just follow it

    // back to index zero and keep going.

    ++ExpectedElt;

    if (ExpectedElt == NumElts)

      ExpectedElt = 0;


    if (M[I] < 0)

      continue; // Ignore UNDEF indices.

    if (ExpectedElt != static_cast<unsigned>(M[I]))

      return false;

  }


  return true;

}


bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,

              ShuffleVectorPseudo &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Dst = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(Dst);

  Register V1 = MI.getOperand(1).getReg();

  Register V2 = MI.getOperand(2).getReg();

  auto Mask = MI.getOperand(3).getShuffleMask();

  uint64_t Imm;

  auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());

  uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;


  if (!ExtInfo) {

    if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||

        !isSingletonExtMask(Mask, DstTy))

      return false;


    Imm = Mask[0] * ExtFactor;

    MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});

    return true;

  }

  bool ReverseExt;

  std::tie(ReverseExt, Imm) = *ExtInfo;

  if (ReverseExt)

    std::swap(V1, V2);

  Imm *= ExtFactor;

  MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});

  return true;

}


/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.

/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.

void applyShuffleVectorPseudo(MachineInstr &MI, MachineRegisterInfo &MRI,

                              ShuffleVectorPseudo &MatchInfo) {

  MachineIRBuilder MIRBuilder(MI);

  if (MatchInfo.Opc == TargetOpcode::G_BSWAP) {

    assert(MatchInfo.SrcOps.size() == 1);

    LLT DstTy = MRI.getType(MatchInfo.Dst);

    assert(DstTy == LLT::fixed_vector(8, 8) ||

           DstTy == LLT::fixed_vector(16, 8));

    LLT BSTy = DstTy == LLT::fixed_vector(8, 8)

                   ? LLT::fixed_vector(4, LLT::integer(16))

                   : LLT::fixed_vector(8, LLT::integer(16));

    // FIXME: NVCAST

    auto BS1 = MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {BSTy},

                                     MatchInfo.SrcOps[0]);

    auto BS2 = MIRBuilder.buildInstr(MatchInfo.Opc, {BSTy}, {BS1});

    MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {MatchInfo.Dst}, {BS2});

  } else

    MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);

  MI.eraseFromParent();

}


/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.

/// Special-cased because the constant operand must be emitted as a G_CONSTANT

/// for the imported tablegen patterns to work.

void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {

  MachineIRBuilder MIRBuilder(MI);

  if (MatchInfo.SrcOps[2].getImm() == 0)

    MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);

  else {

    // Tablegen patterns expect an i32 G_CONSTANT as the final op.

    auto Cst = MIRBuilder.buildConstant(LLT::integer(32),

                                        MatchInfo.SrcOps[2].getImm());

    MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},

                          {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});

  }

  MI.eraseFromParent();

}


void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  LLT DstTy = MRI.getType(Dst);

  assert(DstTy.getSizeInBits() == 128 &&

         "Expected 128bit vector in applyFullRev");

  MachineIRBuilder MIRBuilder(MI);

  auto Cst = MIRBuilder.buildConstant(LLT::integer(32), 8);

  auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});

  MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});

  MI.eraseFromParent();

}


bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);


  auto ValAndVReg =

      getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);

  return !ValAndVReg;

}


void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &Builder) {

  auto &Insert = cast<GInsertVectorElement>(MI);

  Builder.setInstrAndDebugLoc(Insert);


  Register Offset = Insert.getIndexReg();

  LLT VecTy = MRI.getType(Insert.getReg(0));

  LLT EltTy = MRI.getType(Insert.getElementReg());

  LLT IdxTy = MRI.getType(Insert.getIndexReg());


  if (VecTy.isScalableVector())

    return;


  // Create a stack slot and store the vector into it

  MachineFunction &MF = Builder.getMF();

  Align Alignment(

      std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));

  int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),

                                                     Alignment, false);

  LLT FramePtrTy = LLT::pointer(0, 64);

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);

  auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);


  Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));


  // Get the pointer to the element, and be sure not to hit undefined behavior

  // if the index is out of bounds.

  assert(isPowerOf2_64(VecTy.getNumElements()) &&

         "Expected a power-2 vector size");

  auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);

  Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);

  auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());

  Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);

  Register EltPtr =

      Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)

          .getReg(0);


  // Write the inserted element

  Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));

  // Reload the whole vector.

  Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));

  Insert.eraseFromParent();

}


/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a

/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.

///

/// e.g.

///   %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)

///

/// Can be represented as

///

///   %extract = G_EXTRACT_VECTOR_ELT %left, 0

///   %ins = G_INSERT_VECTOR_ELT %left, %extract, 1

///

bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,

              std::tuple<Register, int, Register, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();

  Register Dst = MI.getOperand(0).getReg();

  int NumElts = MRI.getType(Dst).getNumElements();

  auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);

  if (!DstIsLeftAndDstLane)

    return false;

  bool DstIsLeft;

  int DstLane;

  std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;

  Register Left = MI.getOperand(1).getReg();

  Register Right = MI.getOperand(2).getReg();

  Register DstVec = DstIsLeft ? Left : Right;

  Register SrcVec = Left;


  int SrcLane = ShuffleMask[DstLane];

  if (SrcLane >= NumElts) {

    SrcVec = Right;

    SrcLane -= NumElts;

  }


  MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);

  return true;

}


void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,

              MachineIRBuilder &Builder,

              std::tuple<Register, int, Register, int> &MatchInfo) {

  Builder.setInstrAndDebugLoc(MI);

  Register Dst = MI.getOperand(0).getReg();

  auto ScalarTy = MRI.getType(Dst).getElementType();

  Register DstVec, SrcVec;

  int DstLane, SrcLane;

  std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;

  auto SrcCst = Builder.buildConstant(LLT::integer(64), SrcLane);

  auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);

  auto DstCst = Builder.buildConstant(LLT::integer(64), DstLane);

  Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);

  MI.eraseFromParent();

}


/// isVShiftRImm - Check if this is a valid vector for the immediate

/// operand of a vector shift right operation. The value must be in the range:

///   1 <= Value <= ElementBits for a right shift.

bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,

                  int64_t &Cnt) {

  assert(Ty.isVector() && "vector shift count is not a vector type");

  MachineInstr *MI = MRI.getVRegDef(Reg);

  auto Cst = getAArch64VectorSplatScalar(*MI, MRI);

  if (!Cst)

    return false;

  Cnt = *Cst;

  int64_t ElementBits = Ty.getScalarSizeInBits();

  return Cnt >= 1 && Cnt <= ElementBits;

}


/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.

bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,

                       int64_t &Imm) {

  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||

         MI.getOpcode() == TargetOpcode::G_LSHR);

  LLT Ty = MRI.getType(MI.getOperand(1).getReg());

  if (!Ty.isVector())

    return false;

  return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);

}


void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,

                       int64_t &Imm) {

  unsigned Opc = MI.getOpcode();

  assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);

  unsigned NewOpc =

      Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;

  MachineIRBuilder MIB(MI);

  MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);

  MI.eraseFromParent();

}


bool isLegalCmpImmed(const APInt &C) {

  // Works for negative immediates too, as it can be written as an ADDS

  // instruction with a negated immediate.

  return isLegalArithImmed(C.abs().getZExtValue());

}


/// Determine whether an integer G_ICMP against 1 or -1 can compare

/// against 0 instead.

///

/// AArch64 can fold a compare-with-zero more cheaply than some non-arithmetic

/// immediates (SUBS/ADDS, or TST when the LHS is an AND). When the predicate

/// can be adjusted without changing semantics, the RHS may become 0.

///

/// Supported transforms (signed predicates only):

///   (and X, Y) slt  1  =>  (and X, Y) sle 0

///   (and X, Y) sge  1  =>  (and X, Y) sgt 0

///        X   sle -1  =>        X   slt 0

///        X   sgt -1  =>        X   sge 0

///

/// The compare-against-1 cases require the LHS to be G_AND because the

/// compare-with-zero path enables ANDS (TST) selection, and ANDS flags are

/// only reliable for those signed comparisons. This mirrors SelectionDAG

/// emitComparison().

///

/// For compare-against--1 on a non-AND LHS, \p LHS must have a single

/// non-debug use so other users are not left with a different immediate.

///

/// \param LHS The compare LHS register.

/// \param C   The constant RHS (only 1 or all-ones are considered).

/// \param P   In/out predicate; updated when a transform applies.

/// \param MRI Used to inspect the LHS definition and use count.

/// \returns true if \p P was updated and comparing against 0 is equivalent.

static bool shouldBeAdjustedToZero(Register LHS, const APInt &C,

                                   CmpInst::Predicate &P,

                                   const MachineRegisterInfo &MRI) {

  const bool IsAndLHS = getOpcodeDef<GAnd>(LHS, MRI) != nullptr;


  if (C.isOne() && (P == CmpInst::ICMP_SLT || P == CmpInst::ICMP_SGE) &&

      IsAndLHS) {

    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;

    return true;

  }


  if (!IsAndLHS && !MRI.hasOneNonDBGUse(LHS))

    return false;


  if (C.isAllOnes() && (P == CmpInst::ICMP_SLE || P == CmpInst::ICMP_SGT)) {

    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;

    return true;

  }

  return false;

}


/// Determine if it is possible to modify the \p RHS and predicate \p P of a

/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.

///

/// \returns A pair containing the updated immediate and predicate which may

/// be used to optimize the instruction.

///

/// \note This assumes that the comparison has been legalized.

std::optional<std::pair<uint64_t, CmpInst::Predicate>>

tryAdjustICmpImmAndPred(Register LHS, Register RHS, CmpInst::Predicate P,

                        const MachineRegisterInfo &MRI) {

  const auto &Ty = MRI.getType(RHS);

  if (Ty.isVector())

    return std::nullopt;

  assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) &&

         "Expected 32 or 64 bit compare only?");


  // If the RHS is not a constant, or the RHS is already a valid arithmetic

  // immediate, then there is nothing to change.

  auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (!ValAndVReg)

    return std::nullopt;

  APInt C = ValAndVReg->Value;

  if (shouldBeAdjustedToZero(LHS, C, P, MRI))

    return {{0, P}};


  if (isLegalCmpImmed(C))

    return std::nullopt;


  uint64_t OriginalC = C.getZExtValue();


  // We have a non-arithmetic immediate. Check if adjusting the immediate and

  // adjusting the predicate will result in a legal arithmetic immediate.

  switch (P) {

  default:

    return std::nullopt;

  case CmpInst::ICMP_SLT:

  case CmpInst::ICMP_SGE:

    // Check for

    //

    // x slt c => x sle c - 1

    // x sge c => x sgt c - 1

    //

    // When c is not the smallest possible negative number.

    if (C.isMinSignedValue())

      return std::nullopt;

    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;

    C = C - 1;

    break;

  case CmpInst::ICMP_ULT:

  case CmpInst::ICMP_UGE:

    // Check for

    //

    // x ult c => x ule c - 1

    // x uge c => x ugt c - 1

    //

    // When c is not zero.

    assert(!C.isZero() && "C should not be zero here!");

    P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;

    C = C - 1;

    break;

  case CmpInst::ICMP_SLE:

  case CmpInst::ICMP_SGT:

    // Check for

    //

    // x sle c => x slt c + 1

    // x sgt c => s sge c + 1

    //

    // When c is not the largest possible signed integer.

    if (C.isMaxSignedValue())

      return std::nullopt;

    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;

    C = C + 1;

    break;

  case CmpInst::ICMP_ULE:

  case CmpInst::ICMP_UGT:

    // Check for

    //

    // x ule c => x ult c + 1

    // x ugt c => s uge c + 1

    //

    // When c is not the largest possible unsigned integer.

    if (C.isAllOnes())

      return std::nullopt;

    P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;

    C = C + 1;

    break;

  }


  // Check if the new constant is valid, and return the updated constant and

  // predicate if it is.

  uint64_t NewC = C.getZExtValue();

  if (isLegalCmpImmed(C))

    return {{NewC, P}};


  auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {

    SmallVector<AArch64_IMM::ImmInsnModel> Insn;

    AArch64_IMM::expandMOVImm(Imm, 32, Insn);

    return Insn.size();

  };


  if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(NewC))

    return {{NewC, P}};


  return std::nullopt;

}


/// Determine whether or not it is possible to update the RHS and predicate of

/// a G_ICMP instruction such that the RHS will be selected as an arithmetic

/// immediate.

///

/// \p MI - The G_ICMP instruction

/// \p MatchInfo - The new RHS immediate and predicate on success

///

/// See tryAdjustICmpImmAndPred for valid transformations.

bool matchAdjustICmpImmAndPred(

    MachineInstr &MI, const MachineRegisterInfo &MRI,

    std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_ICMP);

  Register LHS = MI.getOperand(2).getReg();

  Register RHS = MI.getOperand(3).getReg();

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(LHS, RHS, Pred, MRI)) {

    MatchInfo = *MaybeNewImmAndPred;

    return true;

  }

  return false;

}


void applyAdjustICmpImmAndPred(

    MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,

    MachineIRBuilder &MIB, GISelChangeObserver &Observer) {

  MIB.setInstrAndDebugLoc(MI);

  MachineOperand &RHS = MI.getOperand(3);

  MachineRegisterInfo &MRI = *MIB.getMRI();

  auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),

                               MatchInfo.first);

  Observer.changingInstr(MI);

  RHS.setReg(Cst->getOperand(0).getReg());

  MI.getOperand(1).setPredicate(MatchInfo.second);

  Observer.changedInstr(MI);

}


bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,

                  std::pair<unsigned, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Src1Reg = MI.getOperand(1).getReg();

  const LLT SrcTy = MRI.getType(Src1Reg);

  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());


  auto LaneIdx = getSplatIndex(MI);

  if (!LaneIdx)

    return false;


  // The lane idx should be within the first source vector.

  if (*LaneIdx >= SrcTy.getNumElements())

    return false;


  if (DstTy != SrcTy)

    return false;


  LLT ScalarTy = SrcTy.getElementType();

  unsigned ScalarSize = ScalarTy.getSizeInBits();


  unsigned Opc = 0;

  switch (SrcTy.getNumElements()) {

  case 2:

    if (ScalarSize == 64)

      Opc = AArch64::G_DUPLANE64;

    else if (ScalarSize == 32)

      Opc = AArch64::G_DUPLANE32;

    break;

  case 4:

    if (ScalarSize == 32)

      Opc = AArch64::G_DUPLANE32;

    else if (ScalarSize == 16)

      Opc = AArch64::G_DUPLANE16;

    break;

  case 8:

    if (ScalarSize == 8)

      Opc = AArch64::G_DUPLANE8;

    else if (ScalarSize == 16)

      Opc = AArch64::G_DUPLANE16;

    break;

  case 16:

    if (ScalarSize == 8)

      Opc = AArch64::G_DUPLANE8;

    break;

  default:

    break;

  }

  if (!Opc)

    return false;


  MatchInfo.first = Opc;

  MatchInfo.second = *LaneIdx;

  return true;

}


void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,

                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {

  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

  Register Src1Reg = MI.getOperand(1).getReg();

  const LLT SrcTy = MRI.getType(Src1Reg);


  B.setInstrAndDebugLoc(MI);

  auto Lane = B.buildConstant(LLT::integer(64), MatchInfo.second);


  Register DupSrc = MI.getOperand(1).getReg();

  // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.

  // To do this, we can use a G_CONCAT_VECTORS to do the widening.

  if (SrcTy.getSizeInBits() == 64) {

    auto Undef = B.buildUndef(SrcTy);

    DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),

                                  {Src1Reg, Undef.getReg(0)})

                 .getReg(0);

  }

  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});

  MI.eraseFromParent();

}


bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto &Unmerge = cast<GUnmerge>(MI);

  Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

  const LLT SrcTy = MRI.getType(Src1Reg);

  if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)

    return false;

  return SrcTy.isVector() && !SrcTy.isScalable() &&

         (Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1 ||

          (Unmerge.getNumDefs() == 2 && SrcTy.getSizeInBits() == 128 &&

           MRI.getType(Unmerge.getReg(0)).getSizeInBits() == 64));

}


void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                                 MachineIRBuilder &B) {

  auto &Unmerge = cast<GUnmerge>(MI);

  Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

  const LLT SrcTy = MRI.getType(Src1Reg);

  const LLT DstTy = MRI.getType(Unmerge.getReg(0));

  assert((SrcTy.isVector() && !SrcTy.isScalable()) &&

         "Expected a fixed length vector");


  if (DstTy.isVector()) {

    assert(Unmerge.getNumDefs() == 2);

    if (!MRI.use_nodbg_empty(Unmerge.getReg(0)))

      B.buildExtractSubvector(Unmerge.getReg(0), Src1Reg, 0);

    if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))

      B.buildExtractSubvector(Unmerge.getReg(1), Src1Reg,

                              SrcTy.getNumElements() / 2);

  } else {

    for (int I = 0; I < SrcTy.getNumElements(); ++I)

      if (!MRI.use_nodbg_empty(Unmerge.getReg(I)))

        B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);

  }

  MI.eraseFromParent();

}


bool matchBuildVectorToDup(MachineInstr &MI, Register &Src,

                           MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);


  // Later, during selection, we'll try to match imported patterns using

  // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower

  // G_BUILD_VECTORs which could match those patterns.

  if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))

    return false;


  // Find buildvector which always uses the same register or undef. Return true

  // so long as at least 2 registers were found (not all-undef or only 1

  // non-undef entry).

  Register Reg = 0;

  unsigned NumNonUndef = 0;

  for (const MachineOperand &Op : drop_begin(MI.operands())) {

    if (getOpcodeDef<GImplicitDef>(Op.getReg(), MRI))

      continue;


    if (!Reg)

      Reg = Op.getReg();

    else if (Op.getReg() != Reg)

      return false;

    NumNonUndef++;

  }


  Src = Reg;

  return Reg && NumNonUndef > 1;

}


void applyBuildVectorToDup(MachineInstr &MI, Register Src,

                           MachineRegisterInfo &MRI, MachineIRBuilder &B) {

  B.setInstrAndDebugLoc(MI);

  B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()}, {Src});

  MI.eraseFromParent();

}


/// \returns how many instructions would be saved by folding a G_ICMP's shift

/// and/or extension operations.

static unsigned getCmpOperandFoldingProfit(Register CmpOp,

                                           MachineRegisterInfo &MRI) {

  // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)

  auto IsSupportedExtend = [&](const MachineInstr &MI) {

    if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)

      return true;

    if (MI.getOpcode() == TargetOpcode::G_AND) {

      auto ValAndVReg =

          getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);

      if (ValAndVReg) {

        uint64_t Mask = ValAndVReg->Value.getZExtValue();

        return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);

      }

    }

    return false;

  };


  // No instructions to save if there's more than one use or no uses.

  if (!MRI.hasOneNonDBGUse(CmpOp))

    return 0;


  MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);

  if (IsSupportedExtend(*Def))

    return 1;


  unsigned Opc = Def->getOpcode();

  if (Opc == TargetOpcode::G_SHL || Opc == TargetOpcode::G_LSHR ||

      Opc == TargetOpcode::G_ASHR) {

    auto MaybeShiftAmt =

        getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);

    if (MaybeShiftAmt) {

      uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();

      MachineInstr *ShiftLHS =

          getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);

      if (IsSupportedExtend(*ShiftLHS))

        return (ShiftAmt <= 4) ? 2 : 1;

      LLT Ty = MRI.getType(Def->getOperand(0).getReg());

      if (Ty.isVector())

        return 0;

      unsigned ShiftSize = Ty.getSizeInBits();

      if ((ShiftSize == 32 && ShiftAmt <= 31) ||

          (ShiftSize == 64 && ShiftAmt <= 63))

        return 1;

    }

  }


  return 0;

}


/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP

/// instruction \p MI.

bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_ICMP);

  // Swap the operands if it would introduce a profitable folding opportunity.

  // (e.g. a shift + extend).

  //

  //  For example:

  //    lsl     w13, w11, #1

  //    cmp     w13, w12

  // can be turned into:

  //    cmp     w12, w11, lsl #1


  // Don't swap if there's a constant on the RHS and it is a legal compare

  // immediate, because we know we can fold that.

  Register RHS = MI.getOperand(3).getReg();

  auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (RHSCst && isLegalCmpImmed(RHSCst->Value))

    return false;


  Register LHS = MI.getOperand(2).getReg();

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  auto GetRegForProfit = [&](Register Reg) {

    MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);

    return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;

  };


  // Don't have a constant on the RHS. If we swap the LHS and RHS of the

  // compare, would we be able to fold more instructions?

  Register TheLHS = GetRegForProfit(LHS);

  Register TheRHS = GetRegForProfit(RHS);


  // If the LHS is more likely to give us a folding opportunity, then swap the

  // LHS and RHS.

  return (getCmpOperandFoldingProfit(TheLHS, MRI) >

          getCmpOperandFoldingProfit(TheRHS, MRI));

}


void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {

  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());

  Register LHS = MI.getOperand(2).getReg();

  Register RHS = MI.getOperand(3).getReg();

  Observer.changedInstr(MI);

  MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));

  MI.getOperand(2).setReg(RHS);

  MI.getOperand(3).setReg(LHS);

  Observer.changedInstr(MI);

}


/// \returns a function which builds a vector floating point compare instruction

/// for a condition code \p CC.

/// \param [in] NoNans - True if the instruction has nnan flag.

std::function<Register(MachineIRBuilder &)>

getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,

              MachineRegisterInfo &MRI) {

  LLT OldTy = MRI.getType(LHS);

  LLT DstTy = LLT::fixed_vector(OldTy.getNumElements(),

                                LLT::integer(OldTy.getScalarSizeInBits()));

  assert(DstTy.isVector() && "Expected vector types only?");

  switch (CC) {

  default:

    llvm_unreachable("Unexpected condition code!");

  case AArch64CC::NE:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});

      return MIB.buildNot(DstTy, FCmp).getReg(0);

    };

  case AArch64CC::EQ:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);

    };

  case AArch64CC::GE:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);

    };

  case AArch64CC::GT:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);

    };

  case AArch64CC::LS:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);

    };

  case AArch64CC::MI:

    return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {

      return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);

    };

  }

}


/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.

bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &MIB) {

  assert(MI.getOpcode() == TargetOpcode::G_FCMP);

  const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();


  Register Dst = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(Dst);

  if (!DstTy.isVector() || !ST.hasNEON())

    return false;

  Register LHS = MI.getOperand(2).getReg();

  unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();

  if (EltSize == 16 && !ST.hasFullFP16())

    return false;

  if (EltSize != 16 && EltSize != 32 && EltSize != 64)

    return false;


  return true;

}


/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.

void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &MIB) {

  assert(MI.getOpcode() == TargetOpcode::G_FCMP);


  const auto &CmpMI = cast<GFCmp>(MI);


  Register Dst = CmpMI.getReg(0);

  CmpInst::Predicate Pred = CmpMI.getCond();

  Register LHS = CmpMI.getLHSReg();

  Register RHS = CmpMI.getRHSReg();


  LLT DstTy = MRI.getType(Dst);


  bool Invert = false;

  AArch64CC::CondCode CC, CC2 = AArch64CC::AL;

  if ((Pred == CmpInst::Predicate::FCMP_ORD ||

       Pred == CmpInst::Predicate::FCMP_UNO) &&

      isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {

    // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't

    // NaN, so equivalent to a == a and doesn't need the two comparisons an

    // "ord" normally would.

    // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is

    // thus equivalent to a != a.

    RHS = LHS;

    CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;

  } else

    changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);


  // Instead of having an apply function, just build here to simplify things.

  MIB.setInstrAndDebugLoc(MI);


  // TODO: Also consider GISelValueTracking result if eligible.

  const bool NoNans = MI.getFlag(MachineInstr::FmNoNans);


  auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);

  Register CmpRes;

  if (CC2 == AArch64CC::AL)

    CmpRes = Cmp(MIB);

  else {

    auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);

    auto Cmp2Dst = Cmp2(MIB);

    auto Cmp1Dst = Cmp(MIB);

    CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);

  }

  if (Invert)

    CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);

  MRI.replaceRegWith(Dst, CmpRes);

  MI.eraseFromParent();

}


// Matches G_BUILD_VECTOR where at least one source operand is not a constant

bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto *GBuildVec = cast<GBuildVector>(&MI);


  // Check if the values are all constants

  for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

    auto ConstVal =

        getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);


    if (!ConstVal.has_value())

      return true;

  }


  return false;

}


void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,

                                   MachineIRBuilder &B) {

  auto *GBuildVec = cast<GBuildVector>(&MI);

  LLT DstTy = MRI.getType(GBuildVec->getReg(0));

  Register DstReg = B.buildUndef(DstTy).getReg(0);


  for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

    Register SrcReg = GBuildVec->getSourceReg(I);

    if (mi_match(SrcReg, MRI, m_GImplicitDef()))

      continue;

    auto IdxReg = B.buildConstant(LLT::integer(64), I);

    DstReg =

        B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);

  }

  B.buildCopy(GBuildVec->getReg(0), DstReg);

  GBuildVec->eraseFromParent();

}


bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,

                         Register &SrcReg) {

  assert(MI.getOpcode() == TargetOpcode::G_STORE);

  Register DstReg = MI.getOperand(0).getReg();

  if (MRI.getType(DstReg).isVector())

    return false;

  // Match a store of a truncate.

  if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))

    return false;

  // Only form truncstores for value types of max 64b.

  return MRI.getType(SrcReg).getSizeInBits() <= 64;

}


void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &B, GISelChangeObserver &Observer,

                         Register &SrcReg) {

  assert(MI.getOpcode() == TargetOpcode::G_STORE);

  Observer.changingInstr(MI);

  MI.getOperand(0).setReg(SrcReg);

  Observer.changedInstr(MI);

}


// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to

// form in the first place for combine opportunities, so any remaining ones

// at this stage need be lowered back.

bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {

  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

  Register DstReg = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(DstReg);

  return DstTy.isVector();

}


void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &B, GISelChangeObserver &Observer) {

  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

  B.setInstrAndDebugLoc(MI);

  LegalizerHelper Helper(*MI.getMF(), Observer, B);

  Helper.lower(MI, 0, /* Unused hint type */ LLT());

}


/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)

///           => unused, <N x t> = unmerge v

bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                              Register &MatchInfo) {

  auto &Unmerge = cast<GUnmerge>(MI);

  if (Unmerge.getNumDefs() != 2)

    return false;

  if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))

    return false;


  LLT DstTy = MRI.getType(Unmerge.getReg(0));

  if (!DstTy.isVector())

    return false;


  MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);

  if (!Ext)

    return false;


  Register ExtSrc1 = Ext->getOperand(1).getReg();

  Register ExtSrc2 = Ext->getOperand(2).getReg();

  auto LowestVal =

      getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);

  if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())

    return false;


  if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))

    return false;


  MatchInfo = ExtSrc1;

  return true;

}


void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

                              MachineIRBuilder &B,

                              GISelChangeObserver &Observer, Register &SrcReg) {

  Observer.changingInstr(MI);

  // Swap dst registers.

  Register Dst1 = MI.getOperand(0).getReg();

  MI.getOperand(0).setReg(MI.getOperand(1).getReg());

  MI.getOperand(1).setReg(Dst1);

  MI.getOperand(2).setReg(SrcReg);

  Observer.changedInstr(MI);

}


// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR

// Match v2s64 mul instructions, which will then be scalarised later on

// Doing these two matches in one function to ensure that the order of matching

// will always be the same.

// Try lowering MUL to MULL before trying to scalarize if needed.

bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {

  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  return DstTy == LLT::fixed_vector(2, 64);

}


void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,

                   MachineIRBuilder &B, GISelChangeObserver &Observer) {

  assert(MI.getOpcode() == TargetOpcode::G_MUL &&

         "Expected a G_MUL instruction");


  // Get the instructions that defined the source operand

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");

  LegalizerHelper Helper(*MI.getMF(), Observer, B);

  Helper.fewerElementsVector(

      MI, 0,

      DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2)));

}


class AArch64PostLegalizerLoweringImpl : public Combiner {

protected:

  const CombinerHelper Helper;

  const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;

  const AArch64Subtarget &STI;


public:

  AArch64PostLegalizerLoweringImpl(

      MachineFunction &MF, CombinerInfo &CInfo, GISelCSEInfo *CSEInfo,

      const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

      const AArch64Subtarget &STI);


  static const char *getName() { return "AArch6400PreLegalizerCombiner"; }


  bool tryCombineAll(MachineInstr &I) const override;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

};


#define GET_GICOMBINER_IMPL

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_IMPL


AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(

    MachineFunction &MF, CombinerInfo &CInfo, GISelCSEInfo *CSEInfo,

    const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

    const AArch64Subtarget &STI)

    : Combiner(MF, CInfo, /*VT*/ nullptr, CSEInfo),

      Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),

      STI(STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AArch64GenPostLegalizeGILowering.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


bool runPostLegalizerLowering(

    MachineFunction &MF,

    const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig) {

  if (MF.getProperties().hasFailedISel())

    return false;

  const Function &F = MF.getFunction();


  const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();

  CombinerInfo CInfo(/*AllowIllegalOps=*/true, /*ShouldLegalizeIllegal=*/false,

                     /*LegalizerInfo=*/nullptr, /*OptEnabled=*/true,

                     F.hasOptSize(), F.hasMinSize());

  // Disable fixed-point iteration to reduce compile-time

  CInfo.MaxIterations = 1;

  CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;

  // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.

  CInfo.EnableFullDCE = false;

  AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, /*CSEInfo=*/nullptr,

                                        RuleConfig, ST);

  return Impl.combineMachineInstrs();

}


class AArch64PostLegalizerLoweringLegacy : public MachineFunctionPass {

public:

  static char ID;


  AArch64PostLegalizerLoweringLegacy();


  StringRef getPassName() const override {

    return "AArch64PostLegalizerLowering";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;

};

} // end anonymous namespace


void AArch64PostLegalizerLoweringLegacy::getAnalysisUsage(

    AnalysisUsage &AU) const {

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  MachineFunctionPass::getAnalysisUsage(AU);

}


AArch64PostLegalizerLoweringLegacy::AArch64PostLegalizerLoweringLegacy()

    : MachineFunctionPass(ID) {

  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AArch64PostLegalizerLoweringLegacy::runOnMachineFunction(

    MachineFunction &MF) {

  assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");

  return runPostLegalizerLowering(MF, RuleConfig);

}


char AArch64PostLegalizerLoweringLegacy::ID = 0;

INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLoweringLegacy, DEBUG_TYPE,

                      "Lower AArch64 MachineInstrs after legalization", false,

                      false)

INITIALIZE_PASS_END(AArch64PostLegalizerLoweringLegacy, DEBUG_TYPE,

                    "Lower AArch64 MachineInstrs after legalization", false,

                    false)


AArch64PostLegalizerLoweringPass::AArch64PostLegalizerLoweringPass()

    : RuleConfig(

          std::make_unique<AArch64PostLegalizerLoweringImplRuleConfig>()) {

  if (!RuleConfig->parseCommandLineOption())

    reportFatalUsageError("invalid rule identifier");

}


AArch64PostLegalizerLoweringPass::AArch64PostLegalizerLoweringPass(

    AArch64PostLegalizerLoweringPass &&) = default;


AArch64PostLegalizerLoweringPass::~AArch64PostLegalizerLoweringPass() = default;


PreservedAnalyses


AArch64PostLegalizerLoweringPass::run(MachineFunction &MF,

                                      MachineFunctionAnalysisManager &MFAM) {

  MFPropsModifier _(*this, MF);

  const bool Changed = runPostLegalizerLowering(MF, *RuleConfig);


  if (!Changed)

    return PreservedAnalyses::all();


  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();

  PA.preserveSet<CFGAnalyses>();

  return PA;

}


namespace llvm {


FunctionPass *createAArch64PostLegalizerLowering() {

  return new AArch64PostLegalizerLoweringLegacy();

}


} // end namespace llvm

AArch64BaseInfo.h

AArch64ExpandImm.h

AArch64GlobalISelUtils.h

isVShiftRImm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
Definition AArch64ISelLowering.cpp:17231

isINSMask
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
Definition AArch64ISelLowering.cpp:14698

getCmpOperandFoldingProfit
static unsigned getCmpOperandFoldingProfit(SDValue Op, bool AllowExtend)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
Definition AArch64ISelLowering.cpp:4208

shouldBeAdjustedToZero
static bool shouldBeAdjustedToZero(SDValue LHS, const APInt &C, ISD::CondCode &CC)
Definition AArch64ISelLowering.cpp:4232

isLegalCmpImmed
bool isLegalCmpImmed(const APInt &C)
Definition AArch64ISelLowering.cpp:3761

AArch64LegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AArch64.

AArch64MCTargetDesc.h

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

AArch64PerfectShuffle.h

AArch64Subtarget.h

AArch64.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Utils.h

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

GIMatchTableExecutorImpl.h

GISelChangeObserver.h
This contains common code to allow clients to notify changes to machine instr.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

_
#define _
Definition HexagonMCCodeEmitter.cpp:46

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InstrTypes.h

LegalizerHelper.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineFrameInfo.h

MachineFunctionAnalysisManager.h

MachineFunctionPass.h

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineInstrBuilder.h

MachinePassManager.h

MachineRegisterInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2126

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

getReg
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition MipsDisassembler.cpp:106

P
#define P(N)

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

TargetOpcodes.h

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:75

llvm::AArch64PostLegalizerLoweringPass
Definition AArch64.h:116

llvm::AArch64PostLegalizerLoweringPass::run
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition AArch64PostLegalizerLowering.cpp:1400

llvm::AArch64PostLegalizerLoweringPass::~AArch64PostLegalizerLoweringPass
~AArch64PostLegalizerLoweringPass()

llvm::AArch64PostLegalizerLoweringPass::AArch64PostLegalizerLoweringPass
AArch64PostLegalizerLoweringPass()
Definition AArch64PostLegalizerLowering.cpp:1387

llvm::AArch64Subtarget
Definition AArch64Subtarget.h:38

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563

llvm::APInt::logBase2
unsigned logBase2() const
Definition APInt.h:1784

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740

llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769

llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770

llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763

llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765

llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:749

llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768

llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766

llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:750

llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890

llvm::CombinerHelper
Definition CombinerHelper.h:115

llvm::Combiner
Combiner implementation.
Definition Combiner.h:33

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:65

llvm::GISelCSEInfo
The CSE Analysis object.
Definition CSEInfo.h:72

llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition GISelChangeObserver.h:30

llvm::GISelChangeObserver::changingInstr
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.

llvm::GISelChangeObserver::changedInstr
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.

llvm::LLT
Definition LowLevelType.h:45

llvm::LLT::changeElementCount
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition LowLevelType.h:453

llvm::LLT::isScalableVector
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition LowLevelType.h:378

llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition LowLevelType.h:497

llvm::LLT::getNumElements
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition LowLevelType.h:350

llvm::LLT::isVector
constexpr bool isVector() const
Definition LowLevelType.h:289

llvm::LLT::pointer
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition LowLevelType.h:115

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition LowLevelType.h:387

llvm::LLT::getElementCount
constexpr ElementCount getElementCount() const
Definition LowLevelType.h:380

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition LowLevelType.h:203

llvm::LLT::integer
static LLT integer(unsigned SizeInBits)
Definition LowLevelType.h:92

llvm::LLT::getSizeInBytes
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition LowLevelType.h:397

llvm::LLT::getElementType
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition LowLevelType.h:510

llvm::LegalizerHelper
Definition LegalizerHelper.h:49

llvm::LegalizerHelper::lower
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
Definition LegalizerHelper.cpp:4573

llvm::LegalizerHelper::fewerElementsVector
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
Definition LegalizerHelper.cpp:5591

llvm::MFPropsModifier
An RAII based helper class to modify MachineFunctionProperties when running pass.
Definition MachinePassManager.h:40

llvm::MachineFrameInfo::CreateStackObject
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition MachineFrameInfo.cpp:51

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:188

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:788

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:804

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:749

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition MachineFunction.h:877

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition MachineIRBuilder.h:237

llvm::MachineIRBuilder::buildNot
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
Definition MachineIRBuilder.h:2041

llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition MachineIRBuilder.h:423

llvm::MachineIRBuilder::setInstrAndDebugLoc
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Definition MachineIRBuilder.h:379

llvm::MachineIRBuilder::getMRI
MachineRegisterInfo * getMRI()
Getter for MRI.
Definition MachineIRBuilder.h:311

llvm::MachineIRBuilder::buildOr
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
Definition MachineIRBuilder.h:2026

llvm::MachineIRBuilder::buildCopy
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Definition MachineIRBuilder.cpp:332

llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition MachineIRBuilder.cpp:337

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition MachineInstrBuilder.h:196

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineInstr::FmNoNans
@ FmNoNans
Definition MachineInstr.h:96

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:609

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::hasOneNonDBGUse
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition MachineRegisterInfo.cpp:425

llvm::MachineRegisterInfo::getVRegDef
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition MachineRegisterInfo.cpp:404

llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition MachineRegisterInfo.h:569

llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition MachineRegisterInfo.h:771

llvm::MachineRegisterInfo::cloneVirtualRegister
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
Definition MachineRegisterInfo.cpp:176

llvm::MachineRegisterInfo::replaceRegWith
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition MachineRegisterInfo.cpp:386

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:83

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252

uint64_t

Changed
Changed
Definition ObjCARCOpts.cpp:2366

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:245

llvm::AArch64CC::CondCode
CondCode
Definition AArch64BaseInfo.h:288

llvm::AArch64CC::NE
@ NE
Definition AArch64BaseInfo.h:290

llvm::AArch64CC::GE
@ GE
Definition AArch64BaseInfo.h:299

llvm::AArch64CC::EQ
@ EQ
Definition AArch64BaseInfo.h:289

llvm::AArch64CC::MI
@ MI
Definition AArch64BaseInfo.h:293

llvm::AArch64CC::GT
@ GT
Definition AArch64BaseInfo.h:301

llvm::AArch64CC::AL
@ AL
Definition AArch64BaseInfo.h:303

llvm::AArch64CC::LS
@ LS
Definition AArch64BaseInfo.h:298

llvm::AArch64GISelUtils
Definition AArch64GlobalISelUtils.h:24

llvm::AArch64GISelUtils::isLegalArithImmed
constexpr bool isLegalArithImmed(const uint64_t C)
Definition AArch64GlobalISelUtils.h:28

llvm::AArch64GISelUtils::changeVectorFCMPPredToAArch64CC
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
Definition AArch64GlobalISelUtils.cpp:189

llvm::AArch64GISelUtils::isCMN
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
Definition AArch64GlobalISelUtils.cpp:41

llvm::AArch64GISelUtils::getAArch64VectorSplatScalar
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition AArch64GlobalISelUtils.cpp:33

llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition AArch64ExpandImm.cpp:598

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:311

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition MIPatternMatch.h:278

llvm::MIPatternMatch::m_GImplicitDef
ImplicitDefMatch m_GImplicitDef()
Definition MIPatternMatch.h:472

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GTrunc
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
Definition MIPatternMatch.h:716

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315

llvm::Offset
@ Offset
Definition DWP.cpp:558

llvm::isBuildVectorAllZeros
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1444

llvm::getOpcodeDef
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653

llvm::isZIPMask
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
Definition AArch64PerfectShuffle.h:6629

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:65

llvm::AlignStyle::Right
@ Right
Definition FormatCommon.h:17

llvm::AlignStyle::Left
@ Left
Definition FormatCommon.h:17

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284

llvm::MachineFunctionAnalysisManager
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Definition MachineFunctionAnalysisManager.h:24

llvm::getDefIgnoringCopies
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494

llvm::getMachineFunctionPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
Definition MachinePassManager.cpp:162

llvm::createAArch64PostLegalizerLowering
FunctionPass * createAArch64PostLegalizerLowering()
Definition AArch64PostLegalizerLowering.cpp:1414

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163

llvm::isUZPMask
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
Definition AArch64PerfectShuffle.h:6678

llvm::isREVMask
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
Definition AArch64PerfectShuffle.h:6756

llvm::getAnyConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439

llvm::isBuildVectorAllOnes
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1450

llvm::getSelectionDAGFallbackAnalysisUsage
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1147

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:25

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::getIConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771

llvm::isTRNMask
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
Definition AArch64PerfectShuffle.h:6707

llvm::getSplatIndex
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition VectorUtils.cpp:368

llvm::reportFatalUsageError
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177

std
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:861

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863

true
Definition SPIRVConvergenceRegionAnalysis.cpp:41

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::CombinerInfo
Definition CombinerInfo.h:24

llvm::CombinerInfo::ObserverLevel::SinglePass
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Definition CombinerInfo.h:71

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getFixedStack
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition MachineOperand.cpp:1150