LLVM 23.0.0git
AMDGPUWaitcntUtils.h
Go to the documentation of this file.
1//===- AMDGPUWaitcntUtils.h -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
11
12#include "llvm/ADT/Sequence.h"
14#include "llvm/Support/Debug.h"
17
18namespace llvm {
19
20namespace AMDGPU {
21
23 LOAD_CNT = 0, // VMcnt prior to gfx12.
24 DS_CNT, // LKGMcnt prior to gfx12.
26 STORE_CNT, // VScnt in gfx10/gfx11.
29 BVH_CNT, // gfx12+ only.
30 KM_CNT, // gfx12+ only.
31 X_CNT, // gfx1250.
32 ASYNC_CNT, // gfx1250.
33 TENSOR_CNT, // gfx1250.
35 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
36 VM_VSRC, // gfx12+ expert mode only.
39};
40
42
43// Return an iterator over all counters between LOAD_CNT (the first counter)
44// and \c MaxCounter (exclusive, default value yields an enumeration over
45// all counters).
48
49} // namespace AMDGPU
50
51template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
52 static constexpr bool is_iterable = true;
53};
54
55namespace AMDGPU {
56
57/// Represents the counter values to wait for in an s_waitcnt instruction.
58///
59/// Large values (including the maximum possible integer) can be used to
60/// represent "don't care" waits.
61class Waitcnt {
62 std::array<unsigned, NUM_INST_CNTS> Cnt;
63
64public:
65 unsigned get(InstCounterType T) const { return Cnt[T]; }
66 void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
67
68 Waitcnt() { fill(Cnt, ~0u); }
69 // Pre-gfx12 constructor.
70 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
71 : Waitcnt() {
72 Cnt[LOAD_CNT] = VmCnt;
73 Cnt[EXP_CNT] = ExpCnt;
74 Cnt[DS_CNT] = LgkmCnt;
75 Cnt[STORE_CNT] = VsCnt;
76 }
77
78 // gfx12+ constructor.
79 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
80 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
81 unsigned AsyncCnt, unsigned TensorCnt, unsigned VaVdst,
82 unsigned VmVsrc)
83 : Waitcnt() {
84 Cnt[LOAD_CNT] = LoadCnt;
85 Cnt[DS_CNT] = DsCnt;
86 Cnt[EXP_CNT] = ExpCnt;
87 Cnt[STORE_CNT] = StoreCnt;
88 Cnt[SAMPLE_CNT] = SampleCnt;
89 Cnt[BVH_CNT] = BvhCnt;
90 Cnt[KM_CNT] = KmCnt;
91 Cnt[X_CNT] = XCnt;
92 Cnt[ASYNC_CNT] = AsyncCnt;
93 Cnt[TENSOR_CNT] = TensorCnt;
94 Cnt[VA_VDST] = VaVdst;
95 Cnt[VM_VSRC] = VmVsrc;
96 }
97
98 bool hasWait() const {
99 return any_of(Cnt, [](unsigned Val) { return Val != ~0u; });
100 }
101
104 if (T == STORE_CNT)
105 continue;
106 if (Cnt[T] != ~0u)
107 return true;
108 }
109 return false;
110 }
111
112 bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
113
114 bool hasWaitDepctr() const {
115 return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
116 }
117
119 // Does the right thing provided self and Other are either both pre-gfx12
120 // or both gfx12+.
123 Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
124 return Wait;
125 }
126
127 void print(raw_ostream &OS) const {
128 ListSeparator LS;
130 OS << LS << getInstCounterName(T) << ": " << Cnt[T];
131 if (LS.unused())
132 OS << "none";
133 OS << '\n';
134 }
135
136#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
137 LLVM_DUMP_METHOD void dump() const;
138#endif
139
141 Wait.print(OS);
142 return OS;
143 }
144};
145
146Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
147
148unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
149
150// The following are only meaningful on targets that support
151// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
152
153/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
154/// isa \p Version.
155Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
156
157/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
158/// isa \p Version.
159Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
160
161/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
162/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
163/// \p Version.
164unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
165
166/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
167/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
168/// \p Version.
169unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
170
171} // namespace AMDGPU
172
173} // namespace llvm
174
175#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define T
Provides some synthesis utilities to produce sequences of values.
This file contains some functions that are useful when dealing with strings.
Represents the counter values to wait for in an s_waitcnt instruction.
void print(raw_ostream &OS) const
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt, unsigned AsyncCnt, unsigned TensorCnt, unsigned VaVdst, unsigned VmVsrc)
LLVM_DUMP_METHOD void dump() const
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
Waitcnt combined(const Waitcnt &Other) const
unsigned get(InstCounterType T) const
friend raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)
void set(InstCounterType T, unsigned Val)
A helper class to return the specified delimiter string after the first invocation of operator String...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
iota_range< InstCounterType > inst_counter_types(InstCounterType MaxCounter)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
StringLiteral getInstCounterName(InstCounterType T)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
This is an optimization pass for GlobalISel generic memory operations.
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
@ Wait
Definition Threading.h:60
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334