25#include "llvm/IR/IntrinsicsAMDGPU.h"
30#define DEBUG_TYPE "amdgpu-lower-kernel-attributes"
37enum DispatchPackedOffsets {
48enum ImplicitArgOffsets {
49 HIDDEN_BLOCK_COUNT_X = 0,
50 HIDDEN_BLOCK_COUNT_Y = 4,
51 HIDDEN_BLOCK_COUNT_Z = 8,
53 HIDDEN_GROUP_SIZE_X = 12,
54 HIDDEN_GROUP_SIZE_Y = 14,
55 HIDDEN_GROUP_SIZE_Z = 16,
57 HIDDEN_REMAINDER_X = 18,
58 HIDDEN_REMAINDER_Y = 20,
59 HIDDEN_REMAINDER_Z = 22,
62class AMDGPULowerKernelAttributes :
public ModulePass {
68 bool runOnModule(
Module &M)
override;
70 StringRef getPassName()
const override {
return "AMDGPU Kernel Attributes"; }
78 auto IntrinsicId = IsV5OrAbove ? Intrinsic::amdgcn_implicitarg_ptr
79 : Intrinsic::amdgcn_dispatch_ptr;
87 if (MaxNumGroups == 0 || MaxNumGroups == std::numeric_limits<uint32_t>::max())
90 if (!Load->getType()->isIntegerTy(32))
94 if (Load->hasMetadata(LLVMContext::MD_range))
99 Load->setMetadata(LLVMContext::MD_range,
Range);
104 if (!Load->getType()->isIntegerTy(16))
108 if (Load->hasMetadata(LLVMContext::MD_range))
113 APInt(16, !IsRemainder),
115 Load->setMetadata(LLVMContext::MD_range,
Range);
122 auto *MD =
F->getMetadata(
"reqd_work_group_size");
123 const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3;
125 const bool HasUniformWorkGroupSize =
126 F->hasFnAttribute(
"uniform-work-group-size");
132 Value *BlockCounts[3] = {
nullptr,
nullptr,
nullptr};
133 Value *GroupSizes[3] = {
nullptr,
nullptr,
nullptr};
134 Value *Remainders[3] = {
nullptr,
nullptr,
nullptr};
135 Value *GridSizes[3] = {
nullptr,
nullptr,
nullptr};
138 bool MadeChange =
false;
157 if (!BCI->hasOneUse())
162 if (!Load || !Load->isSimple())
165 unsigned LoadSize =
DL.getTypeStoreSize(Load->getType());
170 case HIDDEN_BLOCK_COUNT_X:
172 BlockCounts[0] = Load;
177 case HIDDEN_BLOCK_COUNT_Y:
179 BlockCounts[1] = Load;
184 case HIDDEN_BLOCK_COUNT_Z:
186 BlockCounts[2] = Load;
191 case HIDDEN_GROUP_SIZE_X:
193 GroupSizes[0] = Load;
197 case HIDDEN_GROUP_SIZE_Y:
199 GroupSizes[1] = Load;
203 case HIDDEN_GROUP_SIZE_Z:
205 GroupSizes[2] = Load;
209 case HIDDEN_REMAINDER_X:
211 Remainders[0] = Load;
215 case HIDDEN_REMAINDER_Y:
217 Remainders[1] = Load;
221 case HIDDEN_REMAINDER_Z:
223 Remainders[2] = Load;
232 case WORKGROUP_SIZE_X:
234 GroupSizes[0] = Load;
236 case WORKGROUP_SIZE_Y:
238 GroupSizes[1] = Load;
240 case WORKGROUP_SIZE_Z:
242 GroupSizes[2] = Load;
262 if (IsV5OrAbove && HasUniformWorkGroupSize) {
272 for (
int I = 0;
I < 3; ++
I) {
273 Value *BlockCount = BlockCounts[
I];
293 for (
Value *Remainder : Remainders) {
296 Remainder->replaceAllUsesWith(
300 }
else if (HasUniformWorkGroupSize) {
320 for (
int I = 0;
I < 3; ++
I) {
321 Value *GroupSize = GroupSizes[
I];
322 Value *GridSize = GridSizes[
I];
323 if (!GroupSize || !GridSize)
337 for (
User *
UMin : ZextGroupSize->users()) {
342 if (HasReqdWorkGroupSize) {
346 KnownSize,
UMin->getType(),
false,
DL));
348 UMin->replaceAllUsesWith(ZextGroupSize);
362 for (
int I = 0;
I < 3;
I++) {
363 Value *GroupSize = GroupSizes[
I];
383 Value *
GEP = Builder.CreateInBoundsGEP(
384 Builder.getInt8Ty(), CI,
385 {ConstantInt::get(Type::getInt64Ty(CI->getContext()),
386 HIDDEN_BLOCK_COUNT_X + I * sizeof(uint32_t))});
387 Instruction *BlockCount = Builder.CreateLoad(Builder.getInt32Ty(),
GEP);
388 BlockCount->
setMetadata(LLVMContext::MD_invariant_load,
393 Value *BlockCountExt = Builder.CreateZExt(BlockCount, Inst->
getType());
402 if (!HasReqdWorkGroupSize)
405 for (
int I = 0;
I < 3;
I++) {
406 Value *GroupSize = GroupSizes[
I];
421bool AMDGPULowerKernelAttributes::runOnModule(
Module &M) {
422 bool MadeChange =
false;
430 SmallPtrSet<Instruction *, 4> HandledUses;
431 for (
auto *U :
BasePtr->users()) {
433 if (HandledUses.
insert(CI).second) {
443 "AMDGPU Kernel Attributes",
false,
false)
447char AMDGPULowerKernelAttributes::
ID = 0;
450 return new AMDGPULowerKernelAttributes();
457 Function *BasePtr = getBasePtrIntrinsic(*
F.getParent(), IsV5OrAbove);
static bool annotateGridSizeLoadWithRangeMD(LoadInst *Load, uint32_t MaxNumGroups)
static bool annotateGroupSizeLoadWithRangeMD(LoadInst *Load, bool IsRemainder)
static bool processUse(CallInst *CI, bool IsV5OrAbove)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
Represents analyses that only rely on functions' control flow.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
constexpr unsigned getMaxFlatWorkGroupSize()
unsigned getAMDHSACodeObjectVersion(const Module &M)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_GEP(const OperandTypes &...Ops)
Matches GetElementPtrInst.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
ModulePass * createAMDGPULowerKernelAttributesPass()
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)