30 : Kind(Kind), Ctx(Ctx) {
31 assert(
Args.size() >= 1 &&
"Needs a minimum of one expression.");
32 assert(Kind != AGVK_None &&
"Cannot construct AMDGPUMCExpr of kind none.");
39 RawArgs =
static_cast<const MCExpr **
>(
40 Ctx.allocate(
sizeof(
const MCExpr *) *
Args.size()));
45AMDGPUMCExpr::~AMDGPUMCExpr() { Ctx.deallocate(RawArgs); }
50 return new (Ctx) AMDGPUMCExpr(Kind, Args, Ctx);
54 assert(Index < Args.size() &&
"Indexing out of bounds AMDGPUMCExpr sub-expr");
75 OS <<
"totalnumvgprs(";
84 OS <<
"instprefsize(";
93 for (
const auto *It = Args.begin(); It != Args.end(); ++It) {
95 if ((It + 1) != Args.end())
106 return std::max(Arg1, Arg2);
110 return std::min(Arg1, Arg2);
116 std::initializer_list<std::reference_wrapper<uint64_t>> Vals) {
118 auto [Expr, ValRef] = Pair;
121 if (!Expr->evaluateAsRelocatable(MCVal, Asm) || !MCVal.
isAbsolute())
128bool AMDGPUMCExpr::evaluateExtraSGPRs(
MCValue &Res,
130 const MCSubtargetInfo &STI = *Ctx.getSubtargetInfo();
131 uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;
137 STI, (
bool)VCCUsed, (
bool)FlatScrUsed, (
bool)XNACKUsed);
142bool AMDGPUMCExpr::evaluateTotalNumVGPR(
MCValue &Res,
144 const MCSubtargetInfo &STI = *Ctx.getSubtargetInfo();
145 uint64_t NumAGPR = 0, NumVGPR = 0;
152 uint64_t TotalNum = Has90AInsts && NumAGPR ?
alignTo(NumVGPR, 4) + NumAGPR
153 : std::max(NumVGPR, NumAGPR);
167bool AMDGPUMCExpr::evaluateOccupancy(
MCValue &Res,
169 uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
173 Args.slice(0, 5), Asm,
174 {MaxWaves, Granule, TargetTotalNumVGPRs, Generation, InitOccupancy});
176 assert(
Success &&
"Arguments 1 to 5 for Occupancy should be known constants");
181 unsigned Occupancy = InitOccupancy;
183 Occupancy = std::min(
188 Occupancy = std::min(Occupancy,
190 NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
199 return amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
200 return amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
203bool AMDGPUMCExpr::evaluateInstPrefSize(
MCValue &Res,
205 uint64_t CodeSizeInBytes = 0;
208 const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
212 uint64_t MaxVal = (1u << FieldWidth) - 1;
219 switch (E->getKind()) {
238 auto *TE =
static_cast<const AMDGPUMCExpr *
>(E);
239 for (
const MCExpr *E : TE->getArgs())
250 std::optional<int64_t>
Total;
255 return evaluateExtraSGPRs(Res, Asm);
257 return evaluateAlignTo(Res, Asm);
259 return evaluateTotalNumVGPR(Res, Asm);
261 return evaluateOccupancy(Res, Asm);
263 return evaluateInstPrefSize(Res, Asm);
266 return Args[0]->evaluateAsRelocatable(Res, Asm);
269 for (
const MCExpr *Arg : Args) {
271 if (!Arg->evaluateAsRelocatable(ArgRes, Asm) || !ArgRes.
isAbsolute())
274 if (!
Total.has_value())
284 for (
const MCExpr *Arg : Args)
289 for (
const MCExpr *Arg : Args) {
290 if (Arg->findAssociatedFragment())
291 return Arg->findAssociatedFragment();
301 const MCExpr *FlatScrUsed,
330 static constexpr unsigned BitWidth = 64;
348 static constexpr unsigned BitWidth = 64;
366 KBM[Expr] = LHSKnown & RHSKnown;
372 std::optional<bool> CompareRes =
KnownBits::eq(LHSKnown, RHSKnown);
377 std::optional<bool> CompareRes =
KnownBits::ne(LHSKnown, RHSKnown);
382 std::optional<bool> CompareRes =
KnownBits::sgt(LHSKnown, RHSKnown);
387 std::optional<bool> CompareRes =
KnownBits::sge(LHSKnown, RHSKnown);
392 std::optional<bool> CompareRes;
394 std::optional<bool> LHSBool =
396 std::optional<bool> RHSBool =
398 if (LHSBool && RHSBool)
399 CompareRes = *LHSBool && *RHSBool;
406 std::optional<bool> CompareRes =
412 std::optional<bool> CompareRes =
KnownBits::slt(LHSKnown, RHSKnown);
417 std::optional<bool> CompareRes =
KnownBits::sle(LHSKnown, RHSKnown);
428 KBM[Expr] = LHSKnown | RHSKnown;
443 KBM[Expr] = LHSKnown ^ RHSKnown;
450 static constexpr unsigned BitWidth = 64;
461 KBM[Expr] = std::move(KB);
472 KBM[Expr] = std::move(KB);
480 static constexpr unsigned BitWidth = 64;
494 KBM[Expr] = std::move(KB);
504 KBM[Expr] = std::move(KB);
514 KBM[Expr] = std::move(KB);
548 static constexpr unsigned BitWidth = 64;
551 if (Expr->evaluateAsAbsolute(Val)) {
606 if (!KBM.
count(Expr))
609 auto ValueCheckKnownBits = [](
KnownBits &KB,
unsigned Value) ->
bool {
624 APInt ConstVal = KBM[Expr].getConstant();
629 if (Expr->evaluateAsAbsolute(EvalValue))
645 if (ValueCheckKnownBits(KBM[
RHS], 0))
651 if (ValueCheckKnownBits(KBM[
LHS], 0))
653 if (ValueCheckKnownBits(KBM[
RHS], 0))
658 if (ValueCheckKnownBits(KBM[
LHS], 1))
660 if (ValueCheckKnownBits(KBM[
RHS], 1))
667 if (ValueCheckKnownBits(KBM[
RHS], 0))
669 if (ValueCheckKnownBits(KBM[
LHS], 0))
674 if (ValueCheckKnownBits(KBM[
LHS], 0) || ValueCheckKnownBits(KBM[
RHS], 0))
681 if (NewLHS !=
LHS || NewRHS !=
RHS)
690 if (SubExpr != NewSubExpr)
722 if (Expr->evaluateAsAbsolute(Val)) {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
static void targetOpKnownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, unsigned Depth)
static void unaryOpKnownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, unsigned Depth)
static KnownBits fromOptionalToKnownBits(std::optional< bool > CompareResult)
static void binaryOpKnownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, unsigned Depth)
static const MCExpr * tryFoldHelper(const MCExpr *Expr, KnownBitsMap &KBM, MCContext &Ctx)
static void knownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, unsigned Depth=0)
static bool evaluateMCExprs(ArrayRef< const MCExpr * > Exprs, const MCAssembler *Asm, std::initializer_list< std::reference_wrapper< uint64_t > > Vals)
static unsigned getInstPrefSizeFieldWidth(const MCSubtargetInfo &STI)
Get the inst_pref_size field width for the given subtarget.
DenseMap< const MCExpr *, KnownBits > KnownBitsMap
AMDHSA kernel descriptor definitions.
AMDGPU target specific MCExpr operations.
ArrayRef< const MCExpr * > getArgs() const
MCFragment * findAssociatedFragment() const override
static const AMDGPUMCExpr * createInstPrefSize(const MCExpr *CodeSizeBytes, MCContext &Ctx)
Create an expression for instruction prefetch size computation: min(divideCeil(CodeSizeBytes,...
void visitUsedExpr(MCStreamer &Streamer) const override
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm) const override
MCContext & getCtx() const
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
const MCExpr * getSubExpr(size_t Index) const
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override
VariantKind getKind() const
static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *E)
Class for arbitrary precision integers.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
This class is intended to be used as a base class for asm properties and features specific to the tar...
void printExpr(raw_ostream &, const MCExpr &) const
Binary assembler expressions.
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Opcode getOpcode() const
Get the kind of this binary expression.
static LLVM_ABI const MCBinaryExpr * create(Opcode Op, const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
@ AShr
Arithmetic shift right.
@ LShr
Logical shift right.
@ GTE
Signed greater than or equal comparison (result is either 0 or some target-specific non-zero value).
@ GT
Signed greater than comparison (result is either 0 or some target-specific non-zero value)
@ Xor
Bitwise exclusive or.
@ LT
Signed less than comparison (result is either 0 or some target-specific non-zero value).
@ LTE
Signed less than or equal comparison (result is either 0 or some target-specific non-zero value).
@ NE
Inequality comparison.
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
@ Unary
Unary expressions.
@ Constant
Constant expressions.
@ SymbolRef
References to labels and assigned expressions.
@ Target
Target specific expression.
@ Specifier
Expression with a relocation specifier.
@ Binary
Binary expressions.
LLVM_ABI bool evaluateAsAbsolute(int64_t &Res) const
Try to evaluate the expression to an absolute value.
Streaming machine code generation interface.
void visitUsedExpr(const MCExpr &Expr)
Generic base class for all target subtargets.
Represent a reference to a symbol from inside an expression.
const MCSymbol & getSymbol() const
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Unary assembler expressions.
Opcode getOpcode() const
Get the kind of this unary expression.
static LLVM_ABI const MCUnaryExpr * create(Opcode Op, const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
const MCExpr * getSubExpr() const
Get the child of this unary expression.
static MCValue get(const MCSymbol *SymA, const MCSymbol *SymB=nullptr, int64_t Val=0, uint32_t Specifier=0)
int64_t getConstant() const
bool isAbsolute() const
Is this an absolute (as opposed to relocatable) value.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getInstCacheLineSize(const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY AMDGPUMCExpr::VariantKind getExprKind(const MCExpr *Expr)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto uninitialized_copy(R &&Src, IterTy Dst)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
@ Success
The lock was released successfully.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static LLVM_ABI std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
void makeNonNegative()
Make this value non-negative.
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
void makeNegative()
Make this value negative.
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
bool isConstant() const
Returns true if we know the value of all bits.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI KnownBits srem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for srem(LHS, RHS).
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits sdiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for sdiv(LHS, RHS).
static KnownBits sub(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from subtraction of LHS and RHS.
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
const APInt & getConstant() const
Returns the value when all bits have a known value.