doxygen/AMDGPUAttributor_8cpp_source.html

//===- AMDGPUAttributor.cpp -----------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file This pass uses Attributor framework to deduce AMDGPU attributes.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUTargetMachine.h"

#include "GCNSubtarget.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/IR/IntrinsicsR600.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Transforms/IPO/Attributor.h"

#include <cstdint>


#define DEBUG_TYPE "amdgpu-attributor"


using namespace llvm;


static cl::opt<unsigned> IndirectCallSpecializationThreshold(

    "amdgpu-indirect-call-specialization-threshold",

    cl::desc(

        "A threshold controls whether an indirect call will be specialized"),

    cl::init(3));


#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,


enum ImplicitArgumentPositions {

#include "AMDGPUAttributes.def"

  LAST_ARG_POS

};


#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,


enum ImplicitArgumentMask {

  UNKNOWN_INTRINSIC = 0,

#include "AMDGPUAttributes.def"

  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,

  NOT_IMPLICIT_INPUT

};


#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},

static constexpr std::pair<ImplicitArgumentMask, StringLiteral>


    ImplicitAttrs[] = {

#include "AMDGPUAttributes.def"

};


// We do not need to note the x workitem or workgroup id because they are always

// initialized.

//

// TODO: We should not add the attributes if the known compile time workgroup

// size is 1 for y/z.

static ImplicitArgumentMask


intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,

                    bool HasApertureRegs, bool SupportsGetDoorBellID,

                    unsigned CodeObjectVersion) {

  switch (ID) {

  case Intrinsic::amdgcn_workitem_id_x:

    NonKernelOnly = true;

    return WORKITEM_ID_X;

  case Intrinsic::amdgcn_workgroup_id_x:

    NonKernelOnly = true;

    return WORKGROUP_ID_X;

  case Intrinsic::amdgcn_workitem_id_y:

  case Intrinsic::r600_read_tidig_y:

    return WORKITEM_ID_Y;

  case Intrinsic::amdgcn_workitem_id_z:

  case Intrinsic::r600_read_tidig_z:

    return WORKITEM_ID_Z;

  case Intrinsic::amdgcn_workgroup_id_y:

  case Intrinsic::r600_read_tgid_y:

    return WORKGROUP_ID_Y;

  case Intrinsic::amdgcn_workgroup_id_z:

  case Intrinsic::r600_read_tgid_z:

    return WORKGROUP_ID_Z;

  case Intrinsic::amdgcn_cluster_id_x:

    NonKernelOnly = true;

    return CLUSTER_ID_X;

  case Intrinsic::amdgcn_cluster_id_y:

    return CLUSTER_ID_Y;

  case Intrinsic::amdgcn_cluster_id_z:

    return CLUSTER_ID_Z;

  case Intrinsic::amdgcn_lds_kernel_id:

    return LDS_KERNEL_ID;

  case Intrinsic::amdgcn_dispatch_ptr:

    return DISPATCH_PTR;

  case Intrinsic::amdgcn_dispatch_id:

    return DISPATCH_ID;

  case Intrinsic::amdgcn_implicitarg_ptr:

    return IMPLICIT_ARG_PTR;

  // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access

  // queue_ptr.

  case Intrinsic::amdgcn_queue_ptr:

    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);

    return QUEUE_PTR;

  case Intrinsic::amdgcn_is_shared:

  case Intrinsic::amdgcn_is_private:

    if (HasApertureRegs)

      return NOT_IMPLICIT_INPUT;

    // Under V5, we need implicitarg_ptr + offsets to access private_base or

    // shared_base. For pre-V5, however, need to access them through queue_ptr +

    // offsets.

    return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR

                                                    : QUEUE_PTR;

  case Intrinsic::amdgcn_wwm:

  case Intrinsic::amdgcn_strict_wwm:

    return WHOLE_WAVE_MODE;

  case Intrinsic::trap:

  case Intrinsic::debugtrap:

  case Intrinsic::ubsantrap:

    if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.

      return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT

                                                      : QUEUE_PTR;

    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);

    return QUEUE_PTR;

  default:

    return UNKNOWN_INTRINSIC;

  }

}


static bool castRequiresQueuePtr(unsigned SrcAS) {

  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;

}


static bool isDSAddress(const Constant *C) {

  const GlobalValue *GV = dyn_cast<GlobalValue>(C);

  if (!GV)

    return false;

  unsigned AS = GV->getAddressSpace();

  return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;

}


/// Returns true if sanitizer attributes are present on a function.


static bool hasSanitizerAttributes(const Function &F) {

  return F.hasFnAttribute(Attribute::SanitizeAddress) ||

         F.hasFnAttribute(Attribute::SanitizeThread) ||

         F.hasFnAttribute(Attribute::SanitizeMemory) ||

         F.hasFnAttribute(Attribute::SanitizeHWAddress) ||

         F.hasFnAttribute(Attribute::SanitizeMemTag);

}


namespace {

class AMDGPUInformationCache : public InformationCache {

public:

  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,

                         BumpPtrAllocator &Allocator,

                         SetVector<Function *> *CGSCC, TargetMachine &TM)

      : InformationCache(M, AG, Allocator, CGSCC), TM(TM),

        CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}


  TargetMachine &TM;


  enum ConstantStatus : uint8_t {

    NONE = 0,

    DS_GLOBAL = 1 << 0,

    ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,

    ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,

    ADDR_SPACE_CAST_BOTH_TO_FLAT =

        ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT,

    CS_WORST = DS_GLOBAL | ADDR_SPACE_CAST_BOTH_TO_FLAT,

  };


  /// Check if the subtarget has aperture regs.

  bool hasApertureRegs(Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.hasApertureRegs();

  }


  /// Check if the subtarget supports GetDoorbellID.

  bool supportsGetDoorbellID(Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.supportsGetDoorbellID();

  }


  std::optional<std::pair<unsigned, unsigned>>

  getFlatWorkGroupSizeAttr(const Function &F) const {

    auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");

    if (!R)

      return std::nullopt;

    return std::make_pair(R->first, *(R->second));

  }


  std::pair<unsigned, unsigned>

  getDefaultFlatWorkGroupSize(const Function &F) const {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());

  }


  std::pair<unsigned, unsigned>

  getMaximumFlatWorkGroupRange(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};

  }


  SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getMaxNumWorkGroups(F);

  }


  /// Get code object version.

  unsigned getCodeObjectVersion() const { return CodeObjectVersion; }


  std::optional<std::pair<unsigned, unsigned>>

  getWavesPerEUAttr(const Function &F) {

    auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",

                                               /*OnlyFirstRequired=*/true);

    if (!Val)

      return std::nullopt;

    if (!Val->second) {

      const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

      Val->second = ST.getMaxWavesPerEU();

    }

    return std::make_pair(Val->first, *(Val->second));

  }


  unsigned getMaxWavesPerEU(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getMaxWavesPerEU();

  }


  unsigned getMaxAddrSpace() const override {

    return AMDGPUAS::MAX_AMDGPU_ADDRESS;

  }


private:

  /// Check if the ConstantExpr \p CE uses an addrspacecast from private or

  /// local to flat. These casts may require the queue pointer.

  static uint8_t visitConstExpr(const ConstantExpr *CE) {

    uint8_t Status = NONE;


    if (CE->getOpcode() == Instruction::AddrSpaceCast) {

      unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();

      if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)

        Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;

      else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)

        Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;

    }


    return Status;

  }


  /// Get the constant access bitmap for \p C.

  uint8_t getConstantAccess(const Constant *C) {

    const auto &It = ConstantStatus.find(C);

    if (It != ConstantStatus.end())

      return It->second.value();


    SmallPtrSet<const Constant *, 8> Visited;

    SmallVector<const Constant *> Worklist;

    Worklist.push_back(C);

    Visited.insert(C);


    uint8_t Result = 0;

    while (Result != CS_WORST && !Worklist.empty()) {

      const Constant *CurC = Worklist.pop_back_val();


      std::optional<uint8_t> &CurCResultOrNone = ConstantStatus[CurC];

      if (CurCResultOrNone) {

        Result |= CurCResultOrNone.value();

        continue;

      }

      uint8_t CurCResult = 0;


      if (isDSAddress(CurC))

        CurCResult |= DS_GLOBAL;


      if (const auto *CE = dyn_cast<ConstantExpr>(CurC))

        CurCResult |= visitConstExpr(CE);


      for (const Use &U : CurC->operands()) {

        if (const auto *OpC = dyn_cast<Constant>(U)) {

          if (Visited.insert(OpC).second)

            Worklist.push_back(OpC);

        }

      }


      CurCResultOrNone = CurCResult;

      Result |= CurCResult;

    }


    ConstantStatus[C] = Result;

    return Result;

  }


public:

  /// Returns true if \p Fn needs the queue pointer because of \p C.

  bool needsQueuePtr(const Constant *C, Function &Fn) {

    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());

    bool HasAperture = hasApertureRegs(Fn);


    // No need to explore the constants.

    if (!IsNonEntryFunc && HasAperture)

      return false;


    uint8_t Access = getConstantAccess(C);


    // We need to trap on DS globals in non-entry functions.

    if (IsNonEntryFunc && (Access & DS_GLOBAL))

      return true;


    return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);

  }


  bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {

    uint8_t Access = getConstantAccess(C);

    return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;

  }


private:

  /// Used to determine if the Constant needs the queue pointer.

  DenseMap<const Constant *, std::optional<uint8_t>> ConstantStatus;

  const unsigned CodeObjectVersion;

};


struct AAAMDAttributes

    : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

                          AbstractAttribute> {

  using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

                            AbstractAttribute>;


  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDAttributes &createForPosition(const IRPosition &IRP,

                                            Attributor &A);


  /// See AbstractAttribute::getName().

  StringRef getName() const override { return "AAAMDAttributes"; }


  /// See AbstractAttribute::getIdAddr().

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDAttributes.

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};

const char AAAMDAttributes::ID = 0;


struct AAUniformWorkGroupSize

    : public StateWrapper<BooleanState, AbstractAttribute> {

  using Base = StateWrapper<BooleanState, AbstractAttribute>;

  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  /// Create an abstract attribute view for the position \p IRP.

  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,

                                                   Attributor &A);


  /// See AbstractAttribute::getName().

  StringRef getName() const override { return "AAUniformWorkGroupSize"; }


  /// See AbstractAttribute::getIdAddr().

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDAttributes.

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};

const char AAUniformWorkGroupSize::ID = 0;


struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {

  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)

      : AAUniformWorkGroupSize(IRP, A) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    CallingConv::ID CC = F->getCallingConv();


    if (CC != CallingConv::AMDGPU_KERNEL)

      return;


    bool InitialValue = F->hasFnAttribute("uniform-work-group-size");


    if (InitialValue)

      indicateOptimisticFixpoint();

    else

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << "\n");


      const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo || !CallerInfo->isValidState())

        return false;


      Change = Change | clampStateAndIndicateChange(this->getState(),

                                                    CallerInfo->getState());


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  ChangeStatus manifest(Attributor &A) override {

    if (!getAssumed())

      return ChangeStatus::UNCHANGED;


    LLVMContext &Ctx = getAssociatedFunction()->getContext();

    return A.manifestAttrs(getIRPosition(),

                           {Attribute::get(Ctx, "uniform-work-group-size")},

                           /*ForceReplace=*/true);

  }


  bool isValidState() const override {

    // This state is always valid, even when the state is false.

    return true;

  }


  const std::string getAsStr(Attributor *) const override {

    return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";

  }


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}

};


AAUniformWorkGroupSize &

AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,

                                          Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);

  llvm_unreachable(

      "AAUniformWorkGroupSize is only valid for function position");

}


struct AAAMDAttributesFunction : public AAAMDAttributes {

  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)

      : AAAMDAttributes(IRP, A) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();


    // If the function requires the implicit arg pointer due to sanitizers,

    // assume it's needed even if explicitly marked as not requiring it.

    // Flat scratch initialization is needed because `asan_malloc_impl`

    // calls introduced later in pipeline will have flat scratch accesses.

    // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs

    // implementation for `asan_malloc_impl` is updated.

    const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);

    if (HasSanitizerAttrs) {

      removeAssumedBits(IMPLICIT_ARG_PTR);

      removeAssumedBits(HOSTCALL_PTR);

      removeAssumedBits(FLAT_SCRATCH_INIT);

    }


    for (auto Attr : ImplicitAttrs) {

      if (HasSanitizerAttrs &&

          (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||

           Attr.first == FLAT_SCRATCH_INIT))

        continue;


      if (F->hasFnAttribute(Attr.second))

        addKnownBits(Attr.first);

    }


    if (F->isDeclaration())

      return;


    // Ignore functions with graphics calling conventions, these are currently

    // not allowed to have kernel arguments.

    if (AMDGPU::isGraphics(F->getCallingConv())) {

      indicatePessimisticFixpoint();

      return;

    }

  }


  ChangeStatus updateImpl(Attributor &A) override {

    Function *F = getAssociatedFunction();

    // The current assumed state used to determine a change.

    auto OrigAssumed = getAssumed();


    // Check for Intrinsics and propagate attributes.

    const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(

        *this, this->getIRPosition(), DepClassTy::REQUIRED);

    if (!AAEdges || !AAEdges->isValidState() ||

        AAEdges->hasNonAsmUnknownCallee())

      return indicatePessimisticFixpoint();


    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());


    bool NeedsImplicit = false;

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);

    bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);

    unsigned COV = InfoCache.getCodeObjectVersion();


    for (Function *Callee : AAEdges->getOptimisticEdges()) {

      Intrinsic::ID IID = Callee->getIntrinsicID();

      if (IID == Intrinsic::not_intrinsic) {

        const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(

            *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);

        if (!AAAMD || !AAAMD->isValidState())

          return indicatePessimisticFixpoint();

        *this &= *AAAMD;

        continue;

      }


      bool NonKernelOnly = false;

      ImplicitArgumentMask AttrMask =

          intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,

                              HasApertureRegs, SupportsGetDoorbellID, COV);


      if (AttrMask == UNKNOWN_INTRINSIC) {

        // Assume not-nocallback intrinsics may invoke a function which accesses

        // implicit arguments.

        //

        // FIXME: This isn't really the correct check. We want to ensure it

        // isn't calling any function that may use implicit arguments regardless

        // of whether it's internal to the module or not.

        //

        // TODO: Ignoring callsite attributes.

        if (!Callee->hasFnAttribute(Attribute::NoCallback))

          return indicatePessimisticFixpoint();

        continue;

      }


      if (AttrMask != NOT_IMPLICIT_INPUT) {

        if ((IsNonEntryFunc || !NonKernelOnly))

          removeAssumedBits(AttrMask);

      }

    }


    // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.

    if (NeedsImplicit)

      removeAssumedBits(IMPLICIT_ARG_PTR);


    if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {

      // Under V5, we need implicitarg_ptr + offsets to access private_base or

      // shared_base. We do not actually need queue_ptr.

      if (COV >= 5)

        removeAssumedBits(IMPLICIT_ARG_PTR);

      else

        removeAssumedBits(QUEUE_PTR);

    }


    if (funcRetrievesMultigridSyncArg(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) &&

             "multigrid_sync_arg needs implicitarg_ptr");

      removeAssumedBits(MULTIGRID_SYNC_ARG);

    }


    if (funcRetrievesHostcallPtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");

      removeAssumedBits(HOSTCALL_PTR);

    }


    if (funcRetrievesHeapPtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");

      removeAssumedBits(HEAP_PTR);

    }


    if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");

      removeAssumedBits(QUEUE_PTR);

    }


    if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {

      removeAssumedBits(LDS_KERNEL_ID);

    }


    if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))

      removeAssumedBits(DEFAULT_QUEUE);


    if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))

      removeAssumedBits(COMPLETION_ACTION);


    if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))

      removeAssumedBits(FLAT_SCRATCH_INIT);


    return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED

                                       : ChangeStatus::UNCHANGED;

  }


  ChangeStatus manifest(Attributor &A) override {

    SmallVector<Attribute, 8> AttrList;

    LLVMContext &Ctx = getAssociatedFunction()->getContext();


    for (auto Attr : ImplicitAttrs) {

      if (isKnown(Attr.first))

        AttrList.push_back(Attribute::get(Ctx, Attr.second));

    }


    return A.manifestAttrs(getIRPosition(), AttrList,

                           /* ForceReplace */ true);

  }


  const std::string getAsStr(Attributor *) const override {

    std::string Str;

    raw_string_ostream OS(Str);

    OS << "AMDInfo[";

    for (auto Attr : ImplicitAttrs)

      if (isAssumed(Attr.first))

        OS << ' ' << Attr.second;

    OS << " ]";

    return OS.str();

  }


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}


private:

  bool checkForQueuePtr(Attributor &A) {

    Function *F = getAssociatedFunction();

    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());


    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    bool NeedsQueuePtr = false;


    auto CheckAddrSpaceCasts = [&](Instruction &I) {

      unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();

      if (castRequiresQueuePtr(SrcAS)) {

        NeedsQueuePtr = true;

        return false;

      }

      return true;

    };


    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);


    // `checkForAllInstructions` is much more cheaper than going through all

    // instructions, try it first.


    // The queue pointer is not needed if aperture regs is present.

    if (!HasApertureRegs) {

      bool UsedAssumedInformation = false;

      A.checkForAllInstructions(CheckAddrSpaceCasts, *this,

                                {Instruction::AddrSpaceCast},

                                UsedAssumedInformation);

    }


    // If we found  that we need the queue pointer, nothing else to do.

    if (NeedsQueuePtr)

      return true;


    if (!IsNonEntryFunc && HasApertureRegs)

      return false;


    for (BasicBlock &BB : *F) {

      for (Instruction &I : BB) {

        for (const Use &U : I.operands()) {

          if (const auto *C = dyn_cast<Constant>(U)) {

            if (InfoCache.needsQueuePtr(C, *F))

              return true;

          }

        }

      }

    }


    return false;

  }


  bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {

    if (COV < 5)

      return false;

    AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {

    if (COV < 5)

      return false;

    AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {

    // Check if this is a call to the implicitarg_ptr builtin and it

    // is used to retrieve the hostcall pointer. The implicit arg for

    // hostcall is not used only if every use of the implicitarg_ptr

    // is a load that clearly does not retrieve any byte of the

    // hostcall pointer. We check this by tracing all the uses of the

    // initial call to the implicitarg_ptr intrinsic.

    auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {

      auto &Call = cast<CallBase>(I);

      if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)

        return true;


      const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(

          *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);

      if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())

        return false;


      return PointerInfoAA->forallInterferingAccesses(

          Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {

            return Acc.getRemoteInst()->isDroppable();

          });

    };


    bool UsedAssumedInformation = false;

    return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,

                                              UsedAssumedInformation);

  }


  bool funcRetrievesLDSKernelId(Attributor &A) {

    auto DoesNotRetrieve = [&](Instruction &I) {

      auto &Call = cast<CallBase>(I);

      return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;

    };

    bool UsedAssumedInformation = false;

    return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,

                                              UsedAssumedInformation);

  }


  // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is

  // not to be set.

  bool needFlatScratchInit(Attributor &A) {

    assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set


    // Check all AddrSpaceCast instructions. FlatScratchInit is needed if

    // there is a cast from PRIVATE_ADDRESS.

    auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {

      return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=

             AMDGPUAS::PRIVATE_ADDRESS;

    };


    bool UsedAssumedInformation = false;

    if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,

                                   {Instruction::AddrSpaceCast},

                                   UsedAssumedInformation))

      return true;


    // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    Function *F = getAssociatedFunction();

    for (Instruction &I : instructions(F)) {

      for (const Use &U : I.operands()) {

        if (const auto *C = dyn_cast<Constant>(U)) {

          if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))

            return true;

        }

      }

    }


    // Finally check callees.


    // This is called on each callee; false means callee shouldn't have

    // no-flat-scratch-init.

    auto CheckForNoFlatScratchInit = [&](Instruction &I) {

      const auto &CB = cast<CallBase>(I);

      const Function *Callee = CB.getCalledFunction();


      // Callee == 0 for inline asm or indirect call with known callees.

      // In the latter case, updateImpl() already checked the callees and we

      // know their FLAT_SCRATCH_INIT bit is set.

      // If function has indirect call with unknown callees, the bit is

      // already removed in updateImpl() and execution won't reach here.

      if (!Callee)

        return true;


      return Callee->getIntrinsicID() !=

             Intrinsic::amdgcn_addrspacecast_nonnull;

    };


    UsedAssumedInformation = false;

    // If any callee is false (i.e. need FlatScratchInit),

    // checkForAllCallLikeInstructions returns false, in which case this

    // function returns true.

    return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,

                                              UsedAssumedInformation);

  }

};


AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,

                                                    Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);

  llvm_unreachable("AAAMDAttributes is only valid for function position");

}


/// Base class to derive different size ranges.

struct AAAMDSizeRangeAttribute

    : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {

  using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;


  StringRef AttrName;


  AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,

                          StringRef AttrName)

      : Base(IRP, 32), AttrName(AttrName) {}


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}


  template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << '\n');


      const auto *CallerInfo = A.getAAFor<AttributeImpl>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo || !CallerInfo->isValidState())

        return false;


      Change |=

          clampStateAndIndicateChange(this->getState(), CallerInfo->getState());


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this,

                                /*RequireAllCallSites=*/true,

                                AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  /// Clamp the assumed range to the default value ([Min, Max]) and emit the

  /// attribute if it is not same as default.

  ChangeStatus

  emitAttributeIfNotDefaultAfterClamp(Attributor &A,

                                      std::pair<unsigned, unsigned> Default) {

    auto [Min, Max] = Default;

    unsigned Lower = getAssumed().getLower().getZExtValue();

    unsigned Upper = getAssumed().getUpper().getZExtValue();


    // Clamp the range to the default value.

    if (Lower < Min)

      Lower = Min;

    if (Upper > Max + 1)

      Upper = Max + 1;


    // No manifest if the value is invalid or same as default after clamp.

    if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))

      return ChangeStatus::UNCHANGED;


    Function *F = getAssociatedFunction();

    LLVMContext &Ctx = F->getContext();

    SmallString<10> Buffer;

    raw_svector_ostream OS(Buffer);

    OS << Lower << ',' << Upper - 1;

    return A.manifestAttrs(getIRPosition(),

                           {Attribute::get(Ctx, AttrName, OS.str())},

                           /*ForceReplace=*/true);

  }


  const std::string getAsStr(Attributor *) const override {

    std::string Str;

    raw_string_ostream OS(Str);

    OS << getName() << '[';

    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;

    OS << ']';

    return OS.str();

  }

};


/// Propagate amdgpu-flat-work-group-size attribute.

struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {

  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)

      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    bool HasAttr = false;

    auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);

    auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);


    if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {

      // We only consider an attribute that is not max range because the front

      // end always emits the attribute, unfortunately, and sometimes it emits

      // the max range.

      if (*Attr != MaxRange) {

        Range = *Attr;

        HasAttr = true;

      }

    }


    // We don't want to directly clamp the state if it's the max range because

    // that is basically the worst state.

    if (Range == MaxRange)

      return;


    auto [Min, Max] = Range;

    ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));

    IntegerRangeState IRS(CR);

    clampStateAndIndicateChange(this->getState(), IRS);


    if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))

      indicateOptimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    return updateImplImpl<AAAMDFlatWorkGroupSize>(A);

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,

                                                   Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    return emitAttributeIfNotDefaultAfterClamp(

        A, InfoCache.getMaximumFlatWorkGroupRange(*F));

  }


  /// See AbstractAttribute::getName()

  StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }


  /// See AbstractAttribute::getIdAddr()

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDFlatWorkGroupSize

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDFlatWorkGroupSize::ID = 0;


AAAMDFlatWorkGroupSize &

AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,

                                          Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);

  llvm_unreachable(

      "AAAMDFlatWorkGroupSize is only valid for function position");

}


struct TupleDecIntegerRangeState : public AbstractState {

  DecIntegerState<uint32_t> X, Y, Z;


  bool isValidState() const override {

    return X.isValidState() && Y.isValidState() && Z.isValidState();

  }


  bool isAtFixpoint() const override {

    return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();

  }


  ChangeStatus indicateOptimisticFixpoint() override {

    return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |

           Z.indicateOptimisticFixpoint();

  }


  ChangeStatus indicatePessimisticFixpoint() override {

    return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |

           Z.indicatePessimisticFixpoint();

  }


  TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {

    X ^= Other.X;

    Y ^= Other.Y;

    Z ^= Other.Z;

    return *this;

  }


  bool operator==(const TupleDecIntegerRangeState &Other) const {

    return X == Other.X && Y == Other.Y && Z == Other.Z;

  }


  TupleDecIntegerRangeState &getAssumed() { return *this; }

  const TupleDecIntegerRangeState &getAssumed() const { return *this; }

};


using AAAMDMaxNumWorkgroupsState =

    StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;


/// Propagate amdgpu-max-num-workgroups attribute.

struct AAAMDMaxNumWorkgroups

    : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {

  using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;


  AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);


    X.takeKnownMinimum(MaxNumWorkgroups[0]);

    Y.takeKnownMinimum(MaxNumWorkgroups[1]);

    Z.takeKnownMinimum(MaxNumWorkgroups[2]);


    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << '\n');


      const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo || !CallerInfo->isValidState())

        return false;


      Change |=

          clampStateAndIndicateChange(this->getState(), CallerInfo->getState());

      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this,

                                /*RequireAllCallSites=*/true,

                                AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,

                                                  Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    LLVMContext &Ctx = F->getContext();

    SmallString<32> Buffer;

    raw_svector_ostream OS(Buffer);

    OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();


    // TODO: Should annotate loads of the group size for this to do anything

    // useful.

    return A.manifestAttrs(

        getIRPosition(),

        {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},

        /* ForceReplace= */ true);

  }


  StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }


  const std::string getAsStr(Attributor *) const override {

    std::string Buffer = "AAAMDMaxNumWorkgroupsState[";

    raw_string_ostream OS(Buffer);

    OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()

       << ']';

    return OS.str();

  }


  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDMaxNumWorkgroups

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  void trackStatistics() const override {}


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDMaxNumWorkgroups::ID = 0;


AAAMDMaxNumWorkgroups &

AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);

  llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");

}


/// Propagate amdgpu-waves-per-eu attribute.

struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {

  AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)

      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    // If the attribute exists, we will honor it if it is not the default.

    if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {

      std::pair<unsigned, unsigned> MaxWavesPerEURange{

          1U, InfoCache.getMaxWavesPerEU(*F)};

      if (*Attr != MaxWavesPerEURange) {

        auto [Min, Max] = *Attr;

        ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));

        IntegerRangeState RangeState(Range);

        this->getState() = RangeState;

        indicateOptimisticFixpoint();

        return;

      }

    }


    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      Function *Func = getAssociatedFunction();

      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()

                        << "->" << Func->getName() << '\n');

      (void)Func;


      const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerAA || !CallerAA->isValidState())

        return false;


      ConstantRange Assumed = getAssumed();

      unsigned Min = std::max(Assumed.getLower().getZExtValue(),

                              CallerAA->getAssumed().getLower().getZExtValue());

      unsigned Max = std::max(Assumed.getUpper().getZExtValue(),

                              CallerAA->getAssumed().getUpper().getZExtValue());

      ConstantRange Range(APInt(32, Min), APInt(32, Max));

      IntegerRangeState RangeState(Range);

      getState() = RangeState;

      Change |= getState() == Assumed ? ChangeStatus::UNCHANGED

                                      : ChangeStatus::CHANGED;


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,

                                            Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    return emitAttributeIfNotDefaultAfterClamp(

        A, {1U, InfoCache.getMaxWavesPerEU(*F)});

  }


  /// See AbstractAttribute::getName()

  StringRef getName() const override { return "AAAMDWavesPerEU"; }


  /// See AbstractAttribute::getIdAddr()

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDWavesPerEU

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDWavesPerEU::ID = 0;


AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,

                                                    Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);

  llvm_unreachable("AAAMDWavesPerEU is only valid for function position");

}


/// Compute the minimum number of AGPRs required to allocate the inline asm.

static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,

                                             const CallBase &Call) {

  unsigned ArgNo = 0;

  unsigned ResNo = 0;

  unsigned AGPRDefCount = 0;

  unsigned AGPRUseCount = 0;

  unsigned MaxPhysReg = 0;

  const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();


  // TODO: Overestimates due to not accounting for tied operands

  for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {

    Type *Ty = nullptr;

    switch (CI.Type) {

    case InlineAsm::isOutput: {

      Ty = Call.getType();

      if (auto *STy = dyn_cast<StructType>(Ty))

        Ty = STy->getElementType(ResNo);

      ++ResNo;

      break;

    }

    case InlineAsm::isInput: {

      Ty = Call.getArgOperand(ArgNo++)->getType();

      break;

    }

    case InlineAsm::isLabel:

      continue;

    case InlineAsm::isClobber:

      // Parse the physical register reference.

      break;

    }


    for (StringRef Code : CI.Codes) {

      unsigned RegCount = 0;

      if (Code.starts_with("a")) {

        // Virtual register, compute number of registers based on the type.

        //

        // We ought to be going through TargetLowering to get the number of

        // registers, but we should avoid the dependence on CodeGen here.

        RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);

      } else {

        // Physical register reference

        auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);

        if (Kind == 'a') {

          RegCount = NumRegs;

          MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));

        }


        continue;

      }


      if (CI.Type == InlineAsm::isOutput) {

        // Apply tuple alignment requirement

        //

        // TODO: This is more conservative than necessary.

        AGPRDefCount = alignTo(AGPRDefCount, RegCount);


        AGPRDefCount += RegCount;

        if (CI.isEarlyClobber) {

          AGPRUseCount = alignTo(AGPRUseCount, RegCount);

          AGPRUseCount += RegCount;

        }

      } else {

        AGPRUseCount = alignTo(AGPRUseCount, RegCount);

        AGPRUseCount += RegCount;

      }

    }

  }


  unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);


  // TODO: This is overly conservative. If there are any physical registers,

  // allocate any virtual registers after them so we don't have to solve optimal

  // packing.

  return std::min(MaxVirtReg + MaxPhysReg, 256u);

}


struct AAAMDGPUMinAGPRAlloc

    : public StateWrapper<DecIntegerState<>, AbstractAttribute> {

  using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;

  AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,

                                                 Attributor &A) {

    if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

      return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);

    llvm_unreachable(

        "AAAMDGPUMinAGPRAlloc is only valid for function position");

  }


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto [MinNumAGPR, MaxNumAGPR] =

        AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},

                                        /*OnlyFirstRequired=*/true);

    if (MinNumAGPR == 0) {

      indicateOptimisticFixpoint();

      return;

    }


    if (hasSanitizerAttributes(*F))

      indicatePessimisticFixpoint();

  }


  const std::string getAsStr(Attributor *A) const override {

    std::string Str = "amdgpu-agpr-alloc=";

    raw_string_ostream OS(Str);

    OS << getAssumed();

    return OS.str();

  }


  void trackStatistics() const override {}


  ChangeStatus updateImpl(Attributor &A) override {

    DecIntegerState<> Maximum;


    // Check for cases which require allocation of AGPRs. The only cases where

    // AGPRs are required are if there are direct references to AGPRs, so inline

    // assembly and special intrinsics.

    auto CheckForMinAGPRAllocs = [&](Instruction &I) {

      const auto &CB = cast<CallBase>(I);

      const Value *CalleeOp = CB.getCalledOperand();


      if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {

        // Technically, the inline asm could be invoking a call to an unknown

        // external function that requires AGPRs, but ignore that.

        unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);

        Maximum.takeAssumedMaximum(NumRegs);

        return true;

      }

      switch (CB.getIntrinsicID()) {

      case Intrinsic::not_intrinsic:

        break;

      case Intrinsic::write_register:

      case Intrinsic::read_register:

      case Intrinsic::read_volatile_register: {

        const MDString *RegName = cast<MDString>(

            cast<MDNode>(

                cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())

                ->getOperand(0));

        auto [Kind, RegIdx, NumRegs] =

            AMDGPU::parseAsmPhysRegName(RegName->getString());

        if (Kind == 'a')

          Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));


        return true;

      }

      // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have

      // the nocallback attribute, so the AMDGPU attributor can conservatively

      // drop all implicitly-known inputs and AGPR allocation information. Make

      // sure we still infer that no implicit inputs are required and that the

      // AGPR allocation stays at zero. Trap-like intrinsics may invoke a

      // function which requires AGPRs, so we need to check if the called

      // function has the "trap-func-name" attribute.

      case Intrinsic::trap:

      case Intrinsic::debugtrap:

      case Intrinsic::ubsantrap:

        return CB.hasFnAttr(Attribute::NoCallback) ||

               !CB.hasFnAttr("trap-func-name");

      default:

        // Some intrinsics may use AGPRs, but if we have a choice, we are not

        // required to use AGPRs.

        // Assume !nocallback intrinsics may call a function which requires

        // AGPRs.

        return CB.hasFnAttr(Attribute::NoCallback);

      }


      // TODO: Handle callsite attributes

      auto *CBEdges = A.getAAFor<AACallEdges>(

          *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);

      if (!CBEdges || CBEdges->hasUnknownCallee()) {

        Maximum.indicatePessimisticFixpoint();

        return false;

      }


      for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {

        const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(

            *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);

        if (!CalleeInfo || !CalleeInfo->isValidState()) {

          Maximum.indicatePessimisticFixpoint();

          return false;

        }


        Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());

      }


      return true;

    };


    bool UsedAssumedInformation = false;

    if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,

                                           UsedAssumedInformation))

      return indicatePessimisticFixpoint();


    return clampStateAndIndicateChange(getState(), Maximum);

  }


  ChangeStatus manifest(Attributor &A) override {

    LLVMContext &Ctx = getAssociatedFunction()->getContext();

    SmallString<4> Buffer;

    raw_svector_ostream OS(Buffer);

    OS << getAssumed();


    return A.manifestAttrs(

        getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});

  }


  StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDGPUMinAGPRAllocs

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  static const char ID;

};


const char AAAMDGPUMinAGPRAlloc::ID = 0;


/// An abstract attribute to propagate the function attribute

/// "amdgpu-cluster-dims" from kernel entry functions to device functions.

struct AAAMDGPUClusterDims

    : public StateWrapper<BooleanState, AbstractAttribute> {

  using Base = StateWrapper<BooleanState, AbstractAttribute>;

  AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,

                                                Attributor &A);


  /// See AbstractAttribute::getName().

  StringRef getName() const override { return "AAAMDGPUClusterDims"; }


  /// See AbstractAttribute::getIdAddr().

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDGPUClusterDims.

  static bool classof(const AbstractAttribute *AA) {

    return AA->getIdAddr() == &ID;

  }


  virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDGPUClusterDims::ID = 0;


struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {

  AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)

      : AAAMDGPUClusterDims(IRP, A) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    assert(F && "empty associated function");


    Attr = AMDGPU::ClusterDimsAttr::get(*F);


    // No matter what a kernel function has, it is final.

    if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {

      if (Attr.isUnknown())

        indicatePessimisticFixpoint();

      else

        indicateOptimisticFixpoint();

    }

  }


  const std::string getAsStr(Attributor *A) const override {

    if (!getAssumed() || Attr.isUnknown())

      return "unknown";

    if (Attr.isNoCluster())

      return "no";

    if (Attr.isVariableDims())

      return "variable";

    return Attr.to_string();

  }


  void trackStatistics() const override {}


  ChangeStatus updateImpl(Attributor &A) override {

    auto OldState = Attr;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(

          *this, IRPosition::function(*CS.getInstruction()->getFunction()),

          DepClassTy::REQUIRED);

      if (!CallerAA || !CallerAA->isValidState())

        return false;


      return merge(CallerAA->getClusterDims());

    };


    bool UsedAssumedInformation = false;

    if (!A.checkForAllCallSites(CheckCallSite, *this,

                                /*RequireAllCallSites=*/true,

                                UsedAssumedInformation))

      return indicatePessimisticFixpoint();


    return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;

  }


  ChangeStatus manifest(Attributor &A) override {

    if (Attr.isUnknown())

      return ChangeStatus::UNCHANGED;

    return A.manifestAttrs(

        getIRPosition(),

        {Attribute::get(getAssociatedFunction()->getContext(), AttrName,

                        Attr.to_string())},

        /*ForceReplace=*/true);

  }


  const AMDGPU::ClusterDimsAttr &getClusterDims() const override {

    return Attr;

  }


private:

  bool merge(const AMDGPU::ClusterDimsAttr &Other) {

    // Case 1: Both of them are unknown yet, we do nothing and continue wait for

    // propagation.

    if (Attr.isUnknown() && Other.isUnknown())

      return true;


    // Case 2: The other is determined, but we are unknown yet, we simply take

    // the other's value.

    if (Attr.isUnknown()) {

      Attr = Other;

      return true;

    }


    // Case 3: We are determined but the other is unknown yet, we simply keep

    // everything unchanged.

    if (Other.isUnknown())

      return true;


    // After this point, both are determined.


    // Case 4: If they are same, we do nothing.

    if (Attr == Other)

      return true;


    // Now they are not same.


    // Case 5: If either of us uses cluster (but not both; otherwise case 4

    // would hold), then it is unknown whether cluster will be used, and the

    // state is final, unlike case 1.

    if (Attr.isNoCluster() || Other.isNoCluster()) {

      Attr.setUnknown();

      return false;

    }


    // Case 6: Both of us use cluster, but the dims are different, so the result

    // is, cluster is used, but we just don't have a fixed dims.

    Attr.setVariableDims();

    return true;

  }


  AMDGPU::ClusterDimsAttr Attr;


  static constexpr char AttrName[] = "amdgpu-cluster-dims";

};


AAAMDGPUClusterDims &

AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);

  llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");

}


static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,

                    bool DeleteFns, Module &M, AnalysisGetter &AG,

                    TargetMachine &TM, AMDGPUAttributorOptions Options,

                    ThinOrFullLTOPhase LTOPhase) {


  CallGraphUpdater CGUpdater;

  BumpPtrAllocator Allocator;

  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);

  DenseSet<const char *> Allowed(

      {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,

       &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,

       &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,

       &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,

       &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,

       &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,

       &AAAMDGPUClusterDims::ID, &AAAlign::ID});


  AttributorConfig AC(CGUpdater);

  AC.IsClosedWorldModule = Options.IsClosedWorld;

  AC.Allowed = &Allowed;

  AC.IsModulePass = IsModulePass;

  AC.DeleteFns = DeleteFns;

  AC.DefaultInitializeLiveInternals = false;

  AC.IndirectCalleeSpecializationCallback =

      [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,

         Function &Callee, unsigned NumAssumedCallees) {

        return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&

               (NumAssumedCallees <= IndirectCallSpecializationThreshold);

      };

  AC.IPOAmendableCB = [](const Function &F) {

    return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;

  };


  Attributor A(Functions, InfoCache, AC);


  LLVM_DEBUG({

    StringRef LTOPhaseStr = to_string(LTOPhase);

    dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'

           << "[AMDGPUAttributor] Module " << M.getName() << " is "

           << (AC.IsClosedWorldModule ? "" : "not ")

           << "assumed to be a closed world.\n";

  });


  for (auto *F : Functions) {

    A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));

    A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));

    A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));

    CallingConv::ID CC = F->getCallingConv();

    if (!AMDGPU::isEntryFunctionCC(CC)) {

      A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));

      A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));

    }


    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);

    if (!F->isDeclaration() && ST.hasClusters())

      A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));


    if (ST.hasGFX90AInsts())

      A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));


    for (auto &I : instructions(F)) {

      Value *Ptr = nullptr;

      if (auto *LI = dyn_cast<LoadInst>(&I))

        Ptr = LI->getPointerOperand();

      else if (auto *SI = dyn_cast<StoreInst>(&I))

        Ptr = SI->getPointerOperand();

      else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))

        Ptr = RMW->getPointerOperand();

      else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))

        Ptr = CmpX->getPointerOperand();


      if (Ptr) {

        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));

        A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));

        if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {

          if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)

            A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));

        }

      }

    }

  }


  return A.run() == ChangeStatus::CHANGED;

}

} // namespace


PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,

                                                  ModuleAnalysisManager &AM) {


  FunctionAnalysisManager &FAM =

      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

  AnalysisGetter AG(FAM);


  SetVector<Function *> Functions;

  for (Function &F : M) {

    if (!F.isDeclaration())

      Functions.insert(&F);

  }


  // TODO: Probably preserves CFG

  return runImpl(Functions, /*IsModulePass=*/true, /*DeleteFns=*/true, M, AG,

                 TM, Options, LTOPhase)

             ? PreservedAnalyses::none()

             : PreservedAnalyses::all();

}


PreservedAnalyses llvm::AMDGPUAttributorCGSCCPass::run(LazyCallGraph::SCC &C,

                                                       CGSCCAnalysisManager &AM,

                                                       LazyCallGraph &CG,

                                                       CGSCCUpdateResult &UR) {


  FunctionAnalysisManager &FAM =

      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();

  AnalysisGetter AG(FAM);


  SetVector<Function *> Functions;

  for (LazyCallGraph::Node &N : C) {

    Function *F = &N.getFunction();

    if (!F->isIntrinsic())

      Functions.insert(F);

  }


  AMDGPUAttributorOptions Options;

  Module *M = C.begin()->getFunction().getParent();

  // In the CGSCC pipeline, avoid untracked call graph modifications by

  // disabling function deletion, mirroring the generic AttributorCGSCCPass.

  return runImpl(Functions, /*IsModulePass=*/false, /*DeleteFns=*/false, *M, AG,

                 TM, Options, ThinOrFullLTOPhase::None)

             ? PreservedAnalyses::none()

             : PreservedAnalyses::all();

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

isDSAddress
static bool isDSAddress(const Constant *C)
Definition AMDGPUAttributor.cpp:132

ImplicitAttrs
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Definition AMDGPUAttributor.cpp:51

IndirectCallSpecializationThreshold
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))

intrinsicToAttrMask
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
Definition AMDGPUAttributor.cpp:61

hasSanitizerAttributes
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
Definition AMDGPUAttributor.cpp:141

ImplicitArgumentMask
ImplicitArgumentMask
Definition AMDGPUAttributor.cpp:42

UNKNOWN_INTRINSIC
@ UNKNOWN_INTRINSIC
Definition AMDGPUAttributor.cpp:43

NOT_IMPLICIT_INPUT
@ NOT_IMPLICIT_INPUT
Definition AMDGPUAttributor.cpp:86

ALL_ARGUMENT_MASK
@ ALL_ARGUMENT_MASK
Definition AMDGPUAttributor.cpp:85

ImplicitArgumentPositions
ImplicitArgumentPositions
Definition AMDGPUAttributor.cpp:35

LAST_ARG_POS
@ LAST_ARG_POS
Definition AMDGPUAttributor.cpp:77

castRequiresQueuePtr
static bool castRequiresQueuePtr(unsigned SrcAS)
Definition AMDGPUAttributor.cpp:128

AMDGPUBaseInfo.h

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

instructions
Expand Atomic instructions
Definition AtomicExpandPass.cpp:209

Attributor.h

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:853

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Access
DXIL Resource Access
Definition DXILResourceAccess.cpp:855

Default
@ Default
Definition DwarfDebug.cpp:86

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandIRInsts.cpp:1260

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

RegName
#define RegName(no)

Options
static LVOptions Options
Definition LVOptions.cpp:25

merge
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Definition LoopDeletion.cpp:51

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

Module
Machine Check Debug Module
Definition MachineCheckDebugify.cpp:124

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

FAM
FunctionAnalysisManager FAM
Definition PassBuilderBindings.cpp:61

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

Allocator
Basic Register Allocator
Definition RegAllocBasic.cpp:59

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Definition TargetLibraryInfo.cpp:892

llvm::AMDGPUAttributorCGSCCPass::run
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Definition AMDGPUAttributor.cpp:1700

llvm::AMDGPUAttributorPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition AMDGPUAttributor.cpp:1680

llvm::AMDGPU::ClusterDimsAttr::isNoCluster
bool isNoCluster() const
Definition AMDGPUBaseInfo.h:1889

llvm::AMDGPU::ClusterDimsAttr::setUnknown
void setUnknown()
Definition AMDGPUBaseInfo.h:1895

llvm::AMDGPU::ClusterDimsAttr::get
static ClusterDimsAttr get(const Function &F)
Definition AMDGPUBaseInfo.cpp:3840

llvm::AMDGPU::ClusterDimsAttr::isUnknown
bool isUnknown() const
Definition AMDGPUBaseInfo.h:1887

llvm::AMDGPU::ClusterDimsAttr::setVariableDims
void setVariableDims()
Definition AMDGPUBaseInfo.h:1899

llvm::AMDGPU::ClusterDimsAttr::to_string
std::string to_string() const
Definition AMDGPUBaseInfo.cpp:3816

llvm::AMDGPU::ClusterDimsAttr::isVariableDims
bool isVariableDims() const
Definition AMDGPUBaseInfo.h:1893

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:434

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition InstrTypes.h:1361

llvm::CallBase::getIntrinsicID
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Definition Instructions.cpp:352

llvm::ConstantRange::getLower
const APInt & getLower() const
Return the lower value for this range.
Definition ConstantRange.h:217

llvm::ConstantRange::getUpper
const APInt & getUpper() const
Return the upper value for this range.
Definition ConstantRange.h:220

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::FunctionAnalysisManagerCGSCCProxy
A proxy from a FunctionAnalysisManager to an SCC.
Definition CGSCCPassManager.h:377

llvm::Function
Definition Function.h:65

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272

llvm::GlobalValue
Definition GlobalValue.h:49

llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition GlobalValue.h:207

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition GlobalValue.h:663

llvm::InlineAsm::isLabel
@ isLabel
Definition InlineAsm.h:101

llvm::InlineAsm::isInput
@ isInput
Definition InlineAsm.h:98

llvm::InlineAsm::isOutput
@ isOutput
Definition InlineAsm.h:99

llvm::InlineAsm::isClobber
@ isClobber
Definition InlineAsm.h:100

llvm::Instruction::getFunction
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition Instruction.cpp:90

llvm::LazyCallGraph::Node
A node in the call graph.
Definition LazyCallGraph.h:313

llvm::LazyCallGraph::SCC
An SCC of the call graph.
Definition LazyCallGraph.h:417

llvm::LazyCallGraph
A lazily constructed view of the call graph of a module.
Definition LazyCallGraph.h:109

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:280

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::SetVector
A vector that has set insertion semantics.
Definition SetVector.h:57

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:387

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:681

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::StringRef::str
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222

llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition TargetMachine.h:199

llvm::User::operands
op_range operands()
Definition User.h:267

llvm::User::isDroppable
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::cl::opt
Definition CommandLine.h:1454

Call
CallInst * Call
Definition ObjCARCOpts.cpp:2356

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

llvm::AArch64PACKey::IA
@ IA
Definition AArch64BaseInfo.h:1012

llvm::AMDGPUAS::MAX_AMDGPU_ADDRESS
@ MAX_AMDGPU_ADDRESS
Definition AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition AMDGPUAddrSpace.h:38

llvm::AMDGPU::ImplicitArg::HEAP_PTR_OFFSET
@ HEAP_PTR_OFFSET
Definition SIDefines.h:1089

llvm::AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET
@ QUEUE_PTR_OFFSET
Definition SIDefines.h:1096

llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1285

llvm::AMDGPU::AMDHSA_COV4
@ AMDHSA_COV4
Definition AMDGPUBaseInfo.h:64

llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition AMDGPUBaseInfo.h:64

llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition AMDGPUBaseInfo.cpp:213

llvm::AMDGPU::getDefaultQueueImplicitArgPosition
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:280

llvm::AMDGPU::parseAsmPhysRegName
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
Definition AMDGPUBaseInfo.cpp:1652

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1485

llvm::AMDGPU::parseAsmConstraintPhysReg
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
Definition AMDGPUBaseInfo.cpp:1680

llvm::AMDGPU::getHostcallImplicitArgPosition
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:269

llvm::AMDGPU::getCompletionActionImplicitArgPosition
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:291

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition AMDGPUBaseInfo.cpp:1688

llvm::AMDGPU::isGraphics
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1474

llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:256

llvm::ARCCC::Z
@ Z
Definition ARCInfo.h:41

llvm::ARMBuildAttrs::Allowed
@ Allowed
Definition ARMBuildAttributes.h:127

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M
@ M
Definition ARMTargetParser.h:171

llvm::BitmaskEnumDetail::operator^=
E & operator^=(E &LHS, E RHS)
Definition BitmaskEnum.h:197

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::Constant
@ Constant
Definition ISDOpcodes.h:86

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition Intrinsics.h:49

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::M68k::MemAddrModeKind::u
@ u
Definition M68kBaseInfo.h:59

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:490

llvm::SIEncodingFamily::SI
@ SI
Definition SIDefines.h:37

llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm::lltok::Kind
Kind
Definition LLToken.h:18

llvm::ms_demangle::QualifierMangleMode::Result
@ Result
Definition MicrosoftDemangle.h:132

llvm::pdb::PDB_SymType::Caller
@ Caller
Definition PDBTypes.h:281

llvm::pdb::PDB_SymType::Callee
@ Callee
Definition PDBTypes.h:282

llvm::pdb::DbgHeaderType::Max
@ Max
Definition RawConstants.h:101

llvm::rdf::Code
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388

llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm::sandboxir::getContext
Context & getContext() const
Definition BasicBlock.h:99

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::FunctionAnalysisManagerModuleProxy
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
Definition PassManager.h:693

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition AddressRanges.h:151

llvm::CGSCC
@ CGSCC
Definition Attributor.h:6614

llvm::CGSCCAnalysisManager
AnalysisManager< LazyCallGraph::SCC, LazyCallGraph & > CGSCCAnalysisManager
The CGSCC analysis manager.
Definition CGSCCPassManager.h:123

llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77

llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79

llvm::HexPrintStyle::Upper
@ Upper
Definition NativeFormatting.h:23

llvm::HexPrintStyle::Lower
@ Lower
Definition NativeFormatting.h:23

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::alignTo
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1151

llvm::to_string
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::clampStateAndIndicateChange
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition Attributor.h:3503

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::ChangeStatus
ChangeStatus
{
Definition Attributor.h:497

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:586

llvm::BumpPtrAllocator
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383

llvm::ModuleAnalysisManager
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39

N
#define N

llvm::AAAddressSpace::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:6419

llvm::AAAlign::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:4350

llvm::AACallEdges::getOptimisticEdges
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.

llvm::AACallEdges::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:5585

llvm::AACallEdges::hasNonAsmUnknownCallee
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.

llvm::AAIndirectCallInfo::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:6579

llvm::AANoAliasAddrSpace::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:6460

llvm::AAPointerInfo::Access::getRemoteInst
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition Attributor.h:6143

llvm::AAPointerInfo::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:6260

llvm::AAPotentialConstantValues::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:5363

llvm::AAPotentialValues::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:5401

llvm::AAUnderlyingObjects::ID
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Definition Attributor.h:6330

llvm::AMDGPUAttributorOptions
Definition AMDGPU.h:368

llvm::AbstractAttribute::getIdAddr
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.

llvm::AnalysisGetter
Wrapper for FunctionAnalysisManager.
Definition Attributor.h:1131

llvm::Attributor
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition Attributor.h:1528

llvm::CGSCCUpdateResult
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Definition CGSCCPassManager.h:233

llvm::DecIntegerState::takeAssumedMaximum
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Definition Attributor.h:2894

llvm::IRPosition
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:594

llvm::IRPosition::callsite_returned
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:662

llvm::IRPosition::value
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:618

llvm::IRPosition::IRP_FUNCTION
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:606

llvm::IRPosition::function
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:637

llvm::IRPosition::getPositionKind
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:890

llvm::IRPosition::callsite_function
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:657

llvm::InformationCache
Data structure to hold cached (LLVM-IR) information.
Definition Attributor.h:1207

llvm::IntegerStateBase::isValidState
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Definition Attributor.h:2699

llvm::IntegerStateBase::indicatePessimisticFixpoint
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Definition Attributor.h:2711

llvm::StateWrapper
Helper to tie a abstract state implementation to an abstract attribute.
Definition Attributor.h:3209

llvm::cl::desc
Definition CommandLine.h:410