LLVM 23.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "AMDGPUTargetMachine.h"
15#include "GCNSubtarget.h"
17#include "llvm/IR/IntrinsicsAMDGPU.h"
18#include "llvm/IR/IntrinsicsR600.h"
21
22#define DEBUG_TYPE "amdgpu-attributor"
23
24using namespace llvm;
25
27 "amdgpu-indirect-call-specialization-threshold",
29 "A threshold controls whether an indirect call will be specialized"),
30 cl::init(3));
31
32#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
33
35#include "AMDGPUAttributes.def"
37};
38
39#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
40
43#include "AMDGPUAttributes.def"
46};
47
48#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
49static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
51#include "AMDGPUAttributes.def"
52};
53
54// We do not need to note the x workitem or workgroup id because they are always
55// initialized.
56//
57// TODO: We should not add the attributes if the known compile time workgroup
58// size is 1 for y/z.
60intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
61 bool HasApertureRegs, bool SupportsGetDoorBellID,
62 unsigned CodeObjectVersion) {
63 switch (ID) {
64 case Intrinsic::amdgcn_workitem_id_x:
65 NonKernelOnly = true;
66 return WORKITEM_ID_X;
67 case Intrinsic::amdgcn_workgroup_id_x:
68 NonKernelOnly = true;
69 return WORKGROUP_ID_X;
70 case Intrinsic::amdgcn_workitem_id_y:
71 case Intrinsic::r600_read_tidig_y:
72 return WORKITEM_ID_Y;
73 case Intrinsic::amdgcn_workitem_id_z:
74 case Intrinsic::r600_read_tidig_z:
75 return WORKITEM_ID_Z;
76 case Intrinsic::amdgcn_workgroup_id_y:
77 case Intrinsic::r600_read_tgid_y:
78 return WORKGROUP_ID_Y;
79 case Intrinsic::amdgcn_workgroup_id_z:
80 case Intrinsic::r600_read_tgid_z:
81 return WORKGROUP_ID_Z;
82 case Intrinsic::amdgcn_cluster_id_x:
83 NonKernelOnly = true;
84 return CLUSTER_ID_X;
85 case Intrinsic::amdgcn_cluster_id_y:
86 return CLUSTER_ID_Y;
87 case Intrinsic::amdgcn_cluster_id_z:
88 return CLUSTER_ID_Z;
89 case Intrinsic::amdgcn_lds_kernel_id:
90 return LDS_KERNEL_ID;
91 case Intrinsic::amdgcn_dispatch_ptr:
92 return DISPATCH_PTR;
93 case Intrinsic::amdgcn_dispatch_id:
94 return DISPATCH_ID;
95 case Intrinsic::amdgcn_implicitarg_ptr:
96 return IMPLICIT_ARG_PTR;
97 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
98 // queue_ptr.
99 case Intrinsic::amdgcn_queue_ptr:
100 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
101 return QUEUE_PTR;
102 case Intrinsic::amdgcn_is_shared:
103 case Intrinsic::amdgcn_is_private:
104 if (HasApertureRegs)
105 return NOT_IMPLICIT_INPUT;
106 // Under V5, we need implicitarg_ptr + offsets to access private_base or
107 // shared_base. For pre-V5, however, need to access them through queue_ptr +
108 // offsets.
109 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
110 : QUEUE_PTR;
111 case Intrinsic::trap:
112 case Intrinsic::debugtrap:
113 case Intrinsic::ubsantrap:
114 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
115 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
116 : QUEUE_PTR;
117 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
118 return QUEUE_PTR;
119 default:
120 return UNKNOWN_INTRINSIC;
121 }
122}
123
124static bool castRequiresQueuePtr(unsigned SrcAS) {
125 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
126}
127
128static bool isDSAddress(const Constant *C) {
130 if (!GV)
131 return false;
132 unsigned AS = GV->getAddressSpace();
134}
135
136/// Returns true if sanitizer attributes are present on a function.
137static bool hasSanitizerAttributes(const Function &F) {
138 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
139 F.hasFnAttribute(Attribute::SanitizeThread) ||
140 F.hasFnAttribute(Attribute::SanitizeMemory) ||
141 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
142 F.hasFnAttribute(Attribute::SanitizeMemTag);
143}
144
145namespace {
146class AMDGPUInformationCache : public InformationCache {
147public:
148 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
150 SetVector<Function *> *CGSCC, TargetMachine &TM)
151 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
152 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
153
154 TargetMachine &TM;
155
156 enum ConstantStatus : uint8_t {
157 NONE = 0,
158 DS_GLOBAL = 1 << 0,
159 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
160 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
161 ADDR_SPACE_CAST_BOTH_TO_FLAT =
162 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
163 };
164
165 /// Check if the subtarget has aperture regs.
166 bool hasApertureRegs(Function &F) {
167 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
168 return ST.hasApertureRegs();
169 }
170
171 /// Check if the subtarget supports GetDoorbellID.
172 bool supportsGetDoorbellID(Function &F) {
173 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
174 return ST.supportsGetDoorbellID();
175 }
176
177 std::optional<std::pair<unsigned, unsigned>>
178 getFlatWorkGroupSizeAttr(const Function &F) const {
179 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
180 if (!R)
181 return std::nullopt;
182 return std::make_pair(R->first, *(R->second));
183 }
184
185 std::pair<unsigned, unsigned>
186 getDefaultFlatWorkGroupSize(const Function &F) const {
187 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
188 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
189 }
190
191 std::pair<unsigned, unsigned>
192 getMaximumFlatWorkGroupRange(const Function &F) {
193 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
194 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
195 }
196
197 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
198 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
199 return ST.getMaxNumWorkGroups(F);
200 }
201
202 /// Get code object version.
203 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
204
205 std::optional<std::pair<unsigned, unsigned>>
206 getWavesPerEUAttr(const Function &F) {
207 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
208 /*OnlyFirstRequired=*/true);
209 if (!Val)
210 return std::nullopt;
211 if (!Val->second) {
212 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
213 Val->second = ST.getMaxWavesPerEU();
214 }
215 return std::make_pair(Val->first, *(Val->second));
216 }
217
218 unsigned getMaxWavesPerEU(const Function &F) {
219 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
220 return ST.getMaxWavesPerEU();
221 }
222
223 unsigned getMaxAddrSpace() const override {
225 }
226
227private:
228 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
229 /// local to flat. These casts may require the queue pointer.
230 static uint8_t visitConstExpr(const ConstantExpr *CE) {
231 uint8_t Status = NONE;
232
233 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
234 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
235 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
236 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
237 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
238 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
239 }
240
241 return Status;
242 }
243
244 /// Get the constant access bitmap for \p C.
245 uint8_t getConstantAccess(const Constant *C,
246 SmallPtrSetImpl<const Constant *> &Visited) {
247 auto It = ConstantStatus.find(C);
248 if (It != ConstantStatus.end())
249 return It->second;
250
251 uint8_t Result = 0;
252 if (isDSAddress(C))
253 Result = DS_GLOBAL;
254
255 if (const auto *CE = dyn_cast<ConstantExpr>(C))
256 Result |= visitConstExpr(CE);
257
258 for (const Use &U : C->operands()) {
259 const auto *OpC = dyn_cast<Constant>(U);
260 if (!OpC || !Visited.insert(OpC).second)
261 continue;
262
263 Result |= getConstantAccess(OpC, Visited);
264 }
265 return Result;
266 }
267
268public:
269 /// Returns true if \p Fn needs the queue pointer because of \p C.
270 bool needsQueuePtr(const Constant *C, Function &Fn) {
271 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
272 bool HasAperture = hasApertureRegs(Fn);
273
274 // No need to explore the constants.
275 if (!IsNonEntryFunc && HasAperture)
276 return false;
277
278 SmallPtrSet<const Constant *, 8> Visited;
279 uint8_t Access = getConstantAccess(C, Visited);
280
281 // We need to trap on DS globals in non-entry functions.
282 if (IsNonEntryFunc && (Access & DS_GLOBAL))
283 return true;
284
285 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
286 }
287
288 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
289 SmallPtrSet<const Constant *, 8> Visited;
290 uint8_t Access = getConstantAccess(C, Visited);
291 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
292 }
293
294private:
295 /// Used to determine if the Constant needs the queue pointer.
296 DenseMap<const Constant *, uint8_t> ConstantStatus;
297 const unsigned CodeObjectVersion;
298};
299
300struct AAAMDAttributes
301 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
302 AbstractAttribute> {
303 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
304 AbstractAttribute>;
305
306 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
307
308 /// Create an abstract attribute view for the position \p IRP.
309 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
310 Attributor &A);
311
312 /// See AbstractAttribute::getName().
313 StringRef getName() const override { return "AAAMDAttributes"; }
314
315 /// See AbstractAttribute::getIdAddr().
316 const char *getIdAddr() const override { return &ID; }
317
318 /// This function should return true if the type of the \p AA is
319 /// AAAMDAttributes.
320 static bool classof(const AbstractAttribute *AA) {
321 return (AA->getIdAddr() == &ID);
322 }
323
324 /// Unique ID (due to the unique address)
325 static const char ID;
326};
327const char AAAMDAttributes::ID = 0;
328
329struct AAUniformWorkGroupSize
330 : public StateWrapper<BooleanState, AbstractAttribute> {
331 using Base = StateWrapper<BooleanState, AbstractAttribute>;
332 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
333
334 /// Create an abstract attribute view for the position \p IRP.
335 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
336 Attributor &A);
337
338 /// See AbstractAttribute::getName().
339 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
340
341 /// See AbstractAttribute::getIdAddr().
342 const char *getIdAddr() const override { return &ID; }
343
344 /// This function should return true if the type of the \p AA is
345 /// AAAMDAttributes.
346 static bool classof(const AbstractAttribute *AA) {
347 return (AA->getIdAddr() == &ID);
348 }
349
350 /// Unique ID (due to the unique address)
351 static const char ID;
352};
353const char AAUniformWorkGroupSize::ID = 0;
354
355struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
356 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
357 : AAUniformWorkGroupSize(IRP, A) {}
358
359 void initialize(Attributor &A) override {
360 Function *F = getAssociatedFunction();
361 CallingConv::ID CC = F->getCallingConv();
362
363 if (CC != CallingConv::AMDGPU_KERNEL)
364 return;
365
366 bool InitialValue = F->hasFnAttribute("uniform-work-group-size");
367
368 if (InitialValue)
369 indicateOptimisticFixpoint();
370 else
371 indicatePessimisticFixpoint();
372 }
373
374 ChangeStatus updateImpl(Attributor &A) override {
375 ChangeStatus Change = ChangeStatus::UNCHANGED;
376
377 auto CheckCallSite = [&](AbstractCallSite CS) {
378 Function *Caller = CS.getInstruction()->getFunction();
379 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
380 << "->" << getAssociatedFunction()->getName() << "\n");
381
382 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
383 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
384 if (!CallerInfo || !CallerInfo->isValidState())
385 return false;
386
387 Change = Change | clampStateAndIndicateChange(this->getState(),
388 CallerInfo->getState());
389
390 return true;
391 };
392
393 bool AllCallSitesKnown = true;
394 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
395 return indicatePessimisticFixpoint();
396
397 return Change;
398 }
399
400 ChangeStatus manifest(Attributor &A) override {
401 if (!getAssumed())
402 return ChangeStatus::UNCHANGED;
403
404 LLVMContext &Ctx = getAssociatedFunction()->getContext();
405 return A.manifestAttrs(getIRPosition(),
406 {Attribute::get(Ctx, "uniform-work-group-size")},
407 /*ForceReplace=*/true);
408 }
409
410 bool isValidState() const override {
411 // This state is always valid, even when the state is false.
412 return true;
413 }
414
415 const std::string getAsStr(Attributor *) const override {
416 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
417 }
418
419 /// See AbstractAttribute::trackStatistics()
420 void trackStatistics() const override {}
421};
422
423AAUniformWorkGroupSize &
424AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
425 Attributor &A) {
427 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
429 "AAUniformWorkGroupSize is only valid for function position");
430}
431
432struct AAAMDAttributesFunction : public AAAMDAttributes {
433 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
434 : AAAMDAttributes(IRP, A) {}
435
436 void initialize(Attributor &A) override {
437 Function *F = getAssociatedFunction();
438
439 // If the function requires the implicit arg pointer due to sanitizers,
440 // assume it's needed even if explicitly marked as not requiring it.
441 // Flat scratch initialization is needed because `asan_malloc_impl`
442 // calls introduced later in pipeline will have flat scratch accesses.
443 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
444 // implementation for `asan_malloc_impl` is updated.
445 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
446 if (HasSanitizerAttrs) {
447 removeAssumedBits(IMPLICIT_ARG_PTR);
448 removeAssumedBits(HOSTCALL_PTR);
449 removeAssumedBits(FLAT_SCRATCH_INIT);
450 }
451
452 for (auto Attr : ImplicitAttrs) {
453 if (HasSanitizerAttrs &&
454 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
455 Attr.first == FLAT_SCRATCH_INIT))
456 continue;
457
458 if (F->hasFnAttribute(Attr.second))
459 addKnownBits(Attr.first);
460 }
461
462 if (F->isDeclaration())
463 return;
464
465 // Ignore functions with graphics calling conventions, these are currently
466 // not allowed to have kernel arguments.
467 if (AMDGPU::isGraphics(F->getCallingConv())) {
468 indicatePessimisticFixpoint();
469 return;
470 }
471 }
472
473 ChangeStatus updateImpl(Attributor &A) override {
474 Function *F = getAssociatedFunction();
475 // The current assumed state used to determine a change.
476 auto OrigAssumed = getAssumed();
477
478 // Check for Intrinsics and propagate attributes.
479 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
480 *this, this->getIRPosition(), DepClassTy::REQUIRED);
481 if (!AAEdges || !AAEdges->isValidState() ||
482 AAEdges->hasNonAsmUnknownCallee())
483 return indicatePessimisticFixpoint();
484
485 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
486
487 bool NeedsImplicit = false;
488 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
489 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
490 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
491 unsigned COV = InfoCache.getCodeObjectVersion();
492
493 for (Function *Callee : AAEdges->getOptimisticEdges()) {
494 Intrinsic::ID IID = Callee->getIntrinsicID();
495 if (IID == Intrinsic::not_intrinsic) {
496 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
497 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
498 if (!AAAMD || !AAAMD->isValidState())
499 return indicatePessimisticFixpoint();
500 *this &= *AAAMD;
501 continue;
502 }
503
504 bool NonKernelOnly = false;
505 ImplicitArgumentMask AttrMask =
506 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
507 HasApertureRegs, SupportsGetDoorbellID, COV);
508
509 if (AttrMask == UNKNOWN_INTRINSIC) {
510 // Assume not-nocallback intrinsics may invoke a function which accesses
511 // implicit arguments.
512 //
513 // FIXME: This isn't really the correct check. We want to ensure it
514 // isn't calling any function that may use implicit arguments regardless
515 // of whether it's internal to the module or not.
516 //
517 // TODO: Ignoring callsite attributes.
518 if (!Callee->hasFnAttribute(Attribute::NoCallback))
519 return indicatePessimisticFixpoint();
520 continue;
521 }
522
523 if (AttrMask != NOT_IMPLICIT_INPUT) {
524 if ((IsNonEntryFunc || !NonKernelOnly))
525 removeAssumedBits(AttrMask);
526 }
527 }
528
529 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
530 if (NeedsImplicit)
531 removeAssumedBits(IMPLICIT_ARG_PTR);
532
533 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
534 // Under V5, we need implicitarg_ptr + offsets to access private_base or
535 // shared_base. We do not actually need queue_ptr.
536 if (COV >= 5)
537 removeAssumedBits(IMPLICIT_ARG_PTR);
538 else
539 removeAssumedBits(QUEUE_PTR);
540 }
541
542 if (funcRetrievesMultigridSyncArg(A, COV)) {
543 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
544 "multigrid_sync_arg needs implicitarg_ptr");
545 removeAssumedBits(MULTIGRID_SYNC_ARG);
546 }
547
548 if (funcRetrievesHostcallPtr(A, COV)) {
549 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
550 removeAssumedBits(HOSTCALL_PTR);
551 }
552
553 if (funcRetrievesHeapPtr(A, COV)) {
554 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
555 removeAssumedBits(HEAP_PTR);
556 }
557
558 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
559 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
560 removeAssumedBits(QUEUE_PTR);
561 }
562
563 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
564 removeAssumedBits(LDS_KERNEL_ID);
565 }
566
567 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
568 removeAssumedBits(DEFAULT_QUEUE);
569
570 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
571 removeAssumedBits(COMPLETION_ACTION);
572
573 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
574 removeAssumedBits(FLAT_SCRATCH_INIT);
575
576 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
577 : ChangeStatus::UNCHANGED;
578 }
579
580 ChangeStatus manifest(Attributor &A) override {
582 LLVMContext &Ctx = getAssociatedFunction()->getContext();
583
584 for (auto Attr : ImplicitAttrs) {
585 if (isKnown(Attr.first))
586 AttrList.push_back(Attribute::get(Ctx, Attr.second));
587 }
588
589 return A.manifestAttrs(getIRPosition(), AttrList,
590 /* ForceReplace */ true);
591 }
592
593 const std::string getAsStr(Attributor *) const override {
594 std::string Str;
595 raw_string_ostream OS(Str);
596 OS << "AMDInfo[";
597 for (auto Attr : ImplicitAttrs)
598 if (isAssumed(Attr.first))
599 OS << ' ' << Attr.second;
600 OS << " ]";
601 return OS.str();
602 }
603
604 /// See AbstractAttribute::trackStatistics()
605 void trackStatistics() const override {}
606
607private:
608 bool checkForQueuePtr(Attributor &A) {
609 Function *F = getAssociatedFunction();
610 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
611
612 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
613
614 bool NeedsQueuePtr = false;
615
616 auto CheckAddrSpaceCasts = [&](Instruction &I) {
617 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
618 if (castRequiresQueuePtr(SrcAS)) {
619 NeedsQueuePtr = true;
620 return false;
621 }
622 return true;
623 };
624
625 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
626
627 // `checkForAllInstructions` is much more cheaper than going through all
628 // instructions, try it first.
629
630 // The queue pointer is not needed if aperture regs is present.
631 if (!HasApertureRegs) {
632 bool UsedAssumedInformation = false;
633 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
634 {Instruction::AddrSpaceCast},
635 UsedAssumedInformation);
636 }
637
638 // If we found that we need the queue pointer, nothing else to do.
639 if (NeedsQueuePtr)
640 return true;
641
642 if (!IsNonEntryFunc && HasApertureRegs)
643 return false;
644
645 for (BasicBlock &BB : *F) {
646 for (Instruction &I : BB) {
647 for (const Use &U : I.operands()) {
648 if (const auto *C = dyn_cast<Constant>(U)) {
649 if (InfoCache.needsQueuePtr(C, *F))
650 return true;
651 }
652 }
653 }
654 }
655
656 return false;
657 }
658
659 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
661 AA::RangeTy Range(Pos, 8);
662 return funcRetrievesImplicitKernelArg(A, Range);
663 }
664
665 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
667 AA::RangeTy Range(Pos, 8);
668 return funcRetrievesImplicitKernelArg(A, Range);
669 }
670
671 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
673 AA::RangeTy Range(Pos, 8);
674 return funcRetrievesImplicitKernelArg(A, Range);
675 }
676
677 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
679 AA::RangeTy Range(Pos, 8);
680 return funcRetrievesImplicitKernelArg(A, Range);
681 }
682
683 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
684 if (COV < 5)
685 return false;
687 return funcRetrievesImplicitKernelArg(A, Range);
688 }
689
690 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
691 if (COV < 5)
692 return false;
694 return funcRetrievesImplicitKernelArg(A, Range);
695 }
696
697 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
698 // Check if this is a call to the implicitarg_ptr builtin and it
699 // is used to retrieve the hostcall pointer. The implicit arg for
700 // hostcall is not used only if every use of the implicitarg_ptr
701 // is a load that clearly does not retrieve any byte of the
702 // hostcall pointer. We check this by tracing all the uses of the
703 // initial call to the implicitarg_ptr intrinsic.
704 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
705 auto &Call = cast<CallBase>(I);
706 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
707 return true;
708
709 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
710 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
711 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
712 return false;
713
714 return PointerInfoAA->forallInterferingAccesses(
715 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
716 return Acc.getRemoteInst()->isDroppable();
717 });
718 };
719
720 bool UsedAssumedInformation = false;
721 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
722 UsedAssumedInformation);
723 }
724
725 bool funcRetrievesLDSKernelId(Attributor &A) {
726 auto DoesNotRetrieve = [&](Instruction &I) {
727 auto &Call = cast<CallBase>(I);
728 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
729 };
730 bool UsedAssumedInformation = false;
731 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
732 UsedAssumedInformation);
733 }
734
735 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
736 // not to be set.
737 bool needFlatScratchInit(Attributor &A) {
738 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
739
740 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
741 // there is a cast from PRIVATE_ADDRESS.
742 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
743 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
745 };
746
747 bool UsedAssumedInformation = false;
748 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
749 {Instruction::AddrSpaceCast},
750 UsedAssumedInformation))
751 return true;
752
753 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
754 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
755
756 Function *F = getAssociatedFunction();
757 for (Instruction &I : instructions(F)) {
758 for (const Use &U : I.operands()) {
759 if (const auto *C = dyn_cast<Constant>(U)) {
760 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
761 return true;
762 }
763 }
764 }
765
766 // Finally check callees.
767
768 // This is called on each callee; false means callee shouldn't have
769 // no-flat-scratch-init.
770 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
771 const auto &CB = cast<CallBase>(I);
772 const Function *Callee = CB.getCalledFunction();
773
774 // Callee == 0 for inline asm or indirect call with known callees.
775 // In the latter case, updateImpl() already checked the callees and we
776 // know their FLAT_SCRATCH_INIT bit is set.
777 // If function has indirect call with unknown callees, the bit is
778 // already removed in updateImpl() and execution won't reach here.
779 if (!Callee)
780 return true;
781
782 return Callee->getIntrinsicID() !=
783 Intrinsic::amdgcn_addrspacecast_nonnull;
784 };
785
786 UsedAssumedInformation = false;
787 // If any callee is false (i.e. need FlatScratchInit),
788 // checkForAllCallLikeInstructions returns false, in which case this
789 // function returns true.
790 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
791 UsedAssumedInformation);
792 }
793};
794
795AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
796 Attributor &A) {
798 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
799 llvm_unreachable("AAAMDAttributes is only valid for function position");
800}
801
802/// Base class to derive different size ranges.
803struct AAAMDSizeRangeAttribute
804 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
805 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
806
807 StringRef AttrName;
808
809 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
810 StringRef AttrName)
811 : Base(IRP, 32), AttrName(AttrName) {}
812
813 /// See AbstractAttribute::trackStatistics()
814 void trackStatistics() const override {}
815
816 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
817 ChangeStatus Change = ChangeStatus::UNCHANGED;
818
819 auto CheckCallSite = [&](AbstractCallSite CS) {
820 Function *Caller = CS.getInstruction()->getFunction();
821 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
822 << "->" << getAssociatedFunction()->getName() << '\n');
823
824 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
825 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
826 if (!CallerInfo || !CallerInfo->isValidState())
827 return false;
828
829 Change |=
830 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
831
832 return true;
833 };
834
835 bool AllCallSitesKnown = true;
836 if (!A.checkForAllCallSites(CheckCallSite, *this,
837 /*RequireAllCallSites=*/true,
838 AllCallSitesKnown))
839 return indicatePessimisticFixpoint();
840
841 return Change;
842 }
843
844 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
845 /// attribute if it is not same as default.
847 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
848 std::pair<unsigned, unsigned> Default) {
849 auto [Min, Max] = Default;
850 unsigned Lower = getAssumed().getLower().getZExtValue();
851 unsigned Upper = getAssumed().getUpper().getZExtValue();
852
853 // Clamp the range to the default value.
854 if (Lower < Min)
855 Lower = Min;
856 if (Upper > Max + 1)
857 Upper = Max + 1;
858
859 // No manifest if the value is invalid or same as default after clamp.
860 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
861 return ChangeStatus::UNCHANGED;
862
863 Function *F = getAssociatedFunction();
864 LLVMContext &Ctx = F->getContext();
865 SmallString<10> Buffer;
866 raw_svector_ostream OS(Buffer);
867 OS << Lower << ',' << Upper - 1;
868 return A.manifestAttrs(getIRPosition(),
869 {Attribute::get(Ctx, AttrName, OS.str())},
870 /*ForceReplace=*/true);
871 }
872
873 const std::string getAsStr(Attributor *) const override {
874 std::string Str;
875 raw_string_ostream OS(Str);
876 OS << getName() << '[';
877 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
878 OS << ']';
879 return OS.str();
880 }
881};
882
883/// Propagate amdgpu-flat-work-group-size attribute.
884struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
885 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
886 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
887
888 void initialize(Attributor &A) override {
889 Function *F = getAssociatedFunction();
890 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
891
892 bool HasAttr = false;
893 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
894 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
895
896 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
897 // We only consider an attribute that is not max range because the front
898 // end always emits the attribute, unfortunately, and sometimes it emits
899 // the max range.
900 if (*Attr != MaxRange) {
901 Range = *Attr;
902 HasAttr = true;
903 }
904 }
905
906 // We don't want to directly clamp the state if it's the max range because
907 // that is basically the worst state.
908 if (Range == MaxRange)
909 return;
910
911 auto [Min, Max] = Range;
912 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
913 IntegerRangeState IRS(CR);
914 clampStateAndIndicateChange(this->getState(), IRS);
915
916 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
917 indicateOptimisticFixpoint();
918 }
919
920 ChangeStatus updateImpl(Attributor &A) override {
921 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
922 }
923
924 /// Create an abstract attribute view for the position \p IRP.
925 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
926 Attributor &A);
927
928 ChangeStatus manifest(Attributor &A) override {
929 Function *F = getAssociatedFunction();
930 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
931 return emitAttributeIfNotDefaultAfterClamp(
932 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
933 }
934
935 /// See AbstractAttribute::getName()
936 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
937
938 /// See AbstractAttribute::getIdAddr()
939 const char *getIdAddr() const override { return &ID; }
940
941 /// This function should return true if the type of the \p AA is
942 /// AAAMDFlatWorkGroupSize
943 static bool classof(const AbstractAttribute *AA) {
944 return (AA->getIdAddr() == &ID);
945 }
946
947 /// Unique ID (due to the unique address)
948 static const char ID;
949};
950
951const char AAAMDFlatWorkGroupSize::ID = 0;
952
953AAAMDFlatWorkGroupSize &
954AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
955 Attributor &A) {
957 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
959 "AAAMDFlatWorkGroupSize is only valid for function position");
960}
961
962struct TupleDecIntegerRangeState : public AbstractState {
963 DecIntegerState<uint32_t> X, Y, Z;
964
965 bool isValidState() const override {
966 return X.isValidState() && Y.isValidState() && Z.isValidState();
967 }
968
969 bool isAtFixpoint() const override {
970 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
971 }
972
973 ChangeStatus indicateOptimisticFixpoint() override {
974 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
975 Z.indicateOptimisticFixpoint();
976 }
977
978 ChangeStatus indicatePessimisticFixpoint() override {
979 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
980 Z.indicatePessimisticFixpoint();
981 }
982
983 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
984 X ^= Other.X;
985 Y ^= Other.Y;
986 Z ^= Other.Z;
987 return *this;
988 }
989
990 bool operator==(const TupleDecIntegerRangeState &Other) const {
991 return X == Other.X && Y == Other.Y && Z == Other.Z;
992 }
993
994 TupleDecIntegerRangeState &getAssumed() { return *this; }
995 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
996};
997
998using AAAMDMaxNumWorkgroupsState =
999 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1000
1001/// Propagate amdgpu-max-num-workgroups attribute.
1002struct AAAMDMaxNumWorkgroups
1003 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1004 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1005
1006 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1007
1008 void initialize(Attributor &A) override {
1009 Function *F = getAssociatedFunction();
1010 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1011
1012 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1013
1014 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1015 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1016 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1017
1018 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1019 indicatePessimisticFixpoint();
1020 }
1021
1022 ChangeStatus updateImpl(Attributor &A) override {
1023 ChangeStatus Change = ChangeStatus::UNCHANGED;
1024
1025 auto CheckCallSite = [&](AbstractCallSite CS) {
1026 Function *Caller = CS.getInstruction()->getFunction();
1027 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1028 << "->" << getAssociatedFunction()->getName() << '\n');
1029
1030 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1031 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1032 if (!CallerInfo || !CallerInfo->isValidState())
1033 return false;
1034
1035 Change |=
1036 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1037 return true;
1038 };
1039
1040 bool AllCallSitesKnown = true;
1041 if (!A.checkForAllCallSites(CheckCallSite, *this,
1042 /*RequireAllCallSites=*/true,
1043 AllCallSitesKnown))
1044 return indicatePessimisticFixpoint();
1045
1046 return Change;
1047 }
1048
1049 /// Create an abstract attribute view for the position \p IRP.
1050 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1051 Attributor &A);
1052
1053 ChangeStatus manifest(Attributor &A) override {
1054 Function *F = getAssociatedFunction();
1055 LLVMContext &Ctx = F->getContext();
1056 SmallString<32> Buffer;
1057 raw_svector_ostream OS(Buffer);
1058 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1059
1060 // TODO: Should annotate loads of the group size for this to do anything
1061 // useful.
1062 return A.manifestAttrs(
1063 getIRPosition(),
1064 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1065 /* ForceReplace= */ true);
1066 }
1067
1068 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1069
1070 const std::string getAsStr(Attributor *) const override {
1071 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1072 raw_string_ostream OS(Buffer);
1073 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1074 << ']';
1075 return OS.str();
1076 }
1077
1078 const char *getIdAddr() const override { return &ID; }
1079
1080 /// This function should return true if the type of the \p AA is
1081 /// AAAMDMaxNumWorkgroups
1082 static bool classof(const AbstractAttribute *AA) {
1083 return (AA->getIdAddr() == &ID);
1084 }
1085
1086 void trackStatistics() const override {}
1087
1088 /// Unique ID (due to the unique address)
1089 static const char ID;
1090};
1091
1092const char AAAMDMaxNumWorkgroups::ID = 0;
1093
1094AAAMDMaxNumWorkgroups &
1095AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1097 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1098 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1099}
1100
1101/// Propagate amdgpu-waves-per-eu attribute.
1102struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1103 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1104 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1105
1106 void initialize(Attributor &A) override {
1107 Function *F = getAssociatedFunction();
1108 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1109
1110 // If the attribute exists, we will honor it if it is not the default.
1111 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1112 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1113 1U, InfoCache.getMaxWavesPerEU(*F)};
1114 if (*Attr != MaxWavesPerEURange) {
1115 auto [Min, Max] = *Attr;
1116 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1117 IntegerRangeState RangeState(Range);
1118 this->getState() = RangeState;
1119 indicateOptimisticFixpoint();
1120 return;
1121 }
1122 }
1123
1124 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1125 indicatePessimisticFixpoint();
1126 }
1127
1128 ChangeStatus updateImpl(Attributor &A) override {
1129 ChangeStatus Change = ChangeStatus::UNCHANGED;
1130
1131 auto CheckCallSite = [&](AbstractCallSite CS) {
1132 Function *Caller = CS.getInstruction()->getFunction();
1133 Function *Func = getAssociatedFunction();
1134 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1135 << "->" << Func->getName() << '\n');
1136 (void)Func;
1137
1138 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1139 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1140 if (!CallerAA || !CallerAA->isValidState())
1141 return false;
1142
1143 ConstantRange Assumed = getAssumed();
1144 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1145 CallerAA->getAssumed().getLower().getZExtValue());
1146 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1147 CallerAA->getAssumed().getUpper().getZExtValue());
1148 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1149 IntegerRangeState RangeState(Range);
1150 getState() = RangeState;
1151 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1152 : ChangeStatus::CHANGED;
1153
1154 return true;
1155 };
1156
1157 bool AllCallSitesKnown = true;
1158 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1159 return indicatePessimisticFixpoint();
1160
1161 return Change;
1162 }
1163
1164 /// Create an abstract attribute view for the position \p IRP.
1165 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1166 Attributor &A);
1167
1168 ChangeStatus manifest(Attributor &A) override {
1169 Function *F = getAssociatedFunction();
1170 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1171 return emitAttributeIfNotDefaultAfterClamp(
1172 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1173 }
1174
1175 /// See AbstractAttribute::getName()
1176 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1177
1178 /// See AbstractAttribute::getIdAddr()
1179 const char *getIdAddr() const override { return &ID; }
1180
1181 /// This function should return true if the type of the \p AA is
1182 /// AAAMDWavesPerEU
1183 static bool classof(const AbstractAttribute *AA) {
1184 return (AA->getIdAddr() == &ID);
1185 }
1186
1187 /// Unique ID (due to the unique address)
1188 static const char ID;
1189};
1190
1191const char AAAMDWavesPerEU::ID = 0;
1192
1193AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1194 Attributor &A) {
1196 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1197 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1198}
1199
1200/// Compute the minimum number of AGPRs required to allocate the inline asm.
1201static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1202 const CallBase &Call) {
1203 unsigned ArgNo = 0;
1204 unsigned ResNo = 0;
1205 unsigned AGPRDefCount = 0;
1206 unsigned AGPRUseCount = 0;
1207 unsigned MaxPhysReg = 0;
1208 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1209
1210 // TODO: Overestimates due to not accounting for tied operands
1211 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1212 Type *Ty = nullptr;
1213 switch (CI.Type) {
1214 case InlineAsm::isOutput: {
1215 Ty = Call.getType();
1216 if (auto *STy = dyn_cast<StructType>(Ty))
1217 Ty = STy->getElementType(ResNo);
1218 ++ResNo;
1219 break;
1220 }
1221 case InlineAsm::isInput: {
1222 Ty = Call.getArgOperand(ArgNo++)->getType();
1223 break;
1224 }
1225 case InlineAsm::isLabel:
1226 continue;
1228 // Parse the physical register reference.
1229 break;
1230 }
1231
1232 for (StringRef Code : CI.Codes) {
1233 unsigned RegCount = 0;
1234 if (Code.starts_with("a")) {
1235 // Virtual register, compute number of registers based on the type.
1236 //
1237 // We ought to be going through TargetLowering to get the number of
1238 // registers, but we should avoid the dependence on CodeGen here.
1239 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1240 } else {
1241 // Physical register reference
1242 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1243 if (Kind == 'a') {
1244 RegCount = NumRegs;
1245 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1246 }
1247
1248 continue;
1249 }
1250
1251 if (CI.Type == InlineAsm::isOutput) {
1252 // Apply tuple alignment requirement
1253 //
1254 // TODO: This is more conservative than necessary.
1255 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1256
1257 AGPRDefCount += RegCount;
1258 if (CI.isEarlyClobber) {
1259 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1260 AGPRUseCount += RegCount;
1261 }
1262 } else {
1263 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1264 AGPRUseCount += RegCount;
1265 }
1266 }
1267 }
1268
1269 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1270
1271 // TODO: This is overly conservative. If there are any physical registers,
1272 // allocate any virtual registers after them so we don't have to solve optimal
1273 // packing.
1274 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1275}
1276
1277struct AAAMDGPUMinAGPRAlloc
1278 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1279 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1280 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1281
1282 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1283 Attributor &A) {
1285 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1287 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1288 }
1289
1290 void initialize(Attributor &A) override {
1291 Function *F = getAssociatedFunction();
1292 auto [MinNumAGPR, MaxNumAGPR] =
1293 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1294 /*OnlyFirstRequired=*/true);
1295 if (MinNumAGPR == 0)
1296 indicateOptimisticFixpoint();
1297 }
1298
1299 const std::string getAsStr(Attributor *A) const override {
1300 std::string Str = "amdgpu-agpr-alloc=";
1301 raw_string_ostream OS(Str);
1302 OS << getAssumed();
1303 return OS.str();
1304 }
1305
1306 void trackStatistics() const override {}
1307
1308 ChangeStatus updateImpl(Attributor &A) override {
1309 DecIntegerState<> Maximum;
1310
1311 // Check for cases which require allocation of AGPRs. The only cases where
1312 // AGPRs are required are if there are direct references to AGPRs, so inline
1313 // assembly and special intrinsics.
1314 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1315 const auto &CB = cast<CallBase>(I);
1316 const Value *CalleeOp = CB.getCalledOperand();
1317
1318 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1319 // Technically, the inline asm could be invoking a call to an unknown
1320 // external function that requires AGPRs, but ignore that.
1321 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1322 Maximum.takeAssumedMaximum(NumRegs);
1323 return true;
1324 }
1325 switch (CB.getIntrinsicID()) {
1327 break;
1328 case Intrinsic::write_register:
1329 case Intrinsic::read_register:
1330 case Intrinsic::read_volatile_register: {
1331 const MDString *RegName = cast<MDString>(
1333 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1334 ->getOperand(0));
1335 auto [Kind, RegIdx, NumRegs] =
1337 if (Kind == 'a')
1338 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1339
1340 return true;
1341 }
1342 // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
1343 // the nocallback attribute, so the AMDGPU attributor can conservatively
1344 // drop all implicitly-known inputs and AGPR allocation information. Make
1345 // sure we still infer that no implicit inputs are required and that the
1346 // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
1347 // function which requires AGPRs, so we need to check if the called
1348 // function has the "trap-func-name" attribute.
1349 case Intrinsic::trap:
1350 case Intrinsic::debugtrap:
1351 case Intrinsic::ubsantrap:
1352 return CB.hasFnAttr(Attribute::NoCallback) ||
1353 !CB.hasFnAttr("trap-func-name");
1354 default:
1355 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1356 // required to use AGPRs.
1357 // Assume !nocallback intrinsics may call a function which requires
1358 // AGPRs.
1359 return CB.hasFnAttr(Attribute::NoCallback);
1360 }
1361
1362 // TODO: Handle callsite attributes
1363 auto *CBEdges = A.getAAFor<AACallEdges>(
1364 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1365 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1367 return false;
1368 }
1369
1370 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1371 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1372 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1373 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1375 return false;
1376 }
1377
1378 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1379 }
1380
1381 return true;
1382 };
1383
1384 bool UsedAssumedInformation = false;
1385 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1386 UsedAssumedInformation))
1387 return indicatePessimisticFixpoint();
1388
1389 return clampStateAndIndicateChange(getState(), Maximum);
1390 }
1391
1392 ChangeStatus manifest(Attributor &A) override {
1393 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1394 SmallString<4> Buffer;
1395 raw_svector_ostream OS(Buffer);
1396 OS << getAssumed();
1397
1398 return A.manifestAttrs(
1399 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1400 }
1401
1402 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1403 const char *getIdAddr() const override { return &ID; }
1404
1405 /// This function should return true if the type of the \p AA is
1406 /// AAAMDGPUMinAGPRAllocs
1407 static bool classof(const AbstractAttribute *AA) {
1408 return (AA->getIdAddr() == &ID);
1409 }
1410
1411 static const char ID;
1412};
1413
1414const char AAAMDGPUMinAGPRAlloc::ID = 0;
1415
1416/// An abstract attribute to propagate the function attribute
1417/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1418struct AAAMDGPUClusterDims
1419 : public StateWrapper<BooleanState, AbstractAttribute> {
1420 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1421 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1422
1423 /// Create an abstract attribute view for the position \p IRP.
1424 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1425 Attributor &A);
1426
1427 /// See AbstractAttribute::getName().
1428 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1429
1430 /// See AbstractAttribute::getIdAddr().
1431 const char *getIdAddr() const override { return &ID; }
1432
1433 /// This function should return true if the type of the \p AA is
1434 /// AAAMDGPUClusterDims.
1435 static bool classof(const AbstractAttribute *AA) {
1436 return AA->getIdAddr() == &ID;
1437 }
1438
1439 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1440
1441 /// Unique ID (due to the unique address)
1442 static const char ID;
1443};
1444
1445const char AAAMDGPUClusterDims::ID = 0;
1446
1447struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1448 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1449 : AAAMDGPUClusterDims(IRP, A) {}
1450
1451 void initialize(Attributor &A) override {
1452 Function *F = getAssociatedFunction();
1453 assert(F && "empty associated function");
1454
1456
1457 // No matter what a kernel function has, it is final.
1458 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1459 if (Attr.isUnknown())
1460 indicatePessimisticFixpoint();
1461 else
1462 indicateOptimisticFixpoint();
1463 }
1464 }
1465
1466 const std::string getAsStr(Attributor *A) const override {
1467 if (!getAssumed() || Attr.isUnknown())
1468 return "unknown";
1469 if (Attr.isNoCluster())
1470 return "no";
1471 if (Attr.isVariableDims())
1472 return "variable";
1473 return Attr.to_string();
1474 }
1475
1476 void trackStatistics() const override {}
1477
1478 ChangeStatus updateImpl(Attributor &A) override {
1479 auto OldState = Attr;
1480
1481 auto CheckCallSite = [&](AbstractCallSite CS) {
1482 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1483 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1484 DepClassTy::REQUIRED);
1485 if (!CallerAA || !CallerAA->isValidState())
1486 return false;
1487
1488 return merge(CallerAA->getClusterDims());
1489 };
1490
1491 bool UsedAssumedInformation = false;
1492 if (!A.checkForAllCallSites(CheckCallSite, *this,
1493 /*RequireAllCallSites=*/true,
1494 UsedAssumedInformation))
1495 return indicatePessimisticFixpoint();
1496
1497 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1498 }
1499
1500 ChangeStatus manifest(Attributor &A) override {
1501 if (Attr.isUnknown())
1502 return ChangeStatus::UNCHANGED;
1503 return A.manifestAttrs(
1504 getIRPosition(),
1505 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1506 Attr.to_string())},
1507 /*ForceReplace=*/true);
1508 }
1509
1510 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1511 return Attr;
1512 }
1513
1514private:
1515 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1516 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1517 // propagation.
1518 if (Attr.isUnknown() && Other.isUnknown())
1519 return true;
1520
1521 // Case 2: The other is determined, but we are unknown yet, we simply take
1522 // the other's value.
1523 if (Attr.isUnknown()) {
1524 Attr = Other;
1525 return true;
1526 }
1527
1528 // Case 3: We are determined but the other is unknown yet, we simply keep
1529 // everything unchanged.
1530 if (Other.isUnknown())
1531 return true;
1532
1533 // After this point, both are determined.
1534
1535 // Case 4: If they are same, we do nothing.
1536 if (Attr == Other)
1537 return true;
1538
1539 // Now they are not same.
1540
1541 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1542 // would hold), then it is unknown whether cluster will be used, and the
1543 // state is final, unlike case 1.
1544 if (Attr.isNoCluster() || Other.isNoCluster()) {
1545 Attr.setUnknown();
1546 return false;
1547 }
1548
1549 // Case 6: Both of us use cluster, but the dims are different, so the result
1550 // is, cluster is used, but we just don't have a fixed dims.
1551 Attr.setVariableDims();
1552 return true;
1553 }
1554
1555 AMDGPU::ClusterDimsAttr Attr;
1556
1557 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1558};
1559
1560AAAMDGPUClusterDims &
1561AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1563 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1564 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1565}
1566
1567static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,
1568 bool DeleteFns, Module &M, AnalysisGetter &AG,
1569 TargetMachine &TM, AMDGPUAttributorOptions Options,
1570 ThinOrFullLTOPhase LTOPhase) {
1571
1572 CallGraphUpdater CGUpdater;
1574 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1575 DenseSet<const char *> Allowed(
1576 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1577 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1578 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1579 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1582 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1583
1584 AttributorConfig AC(CGUpdater);
1585 AC.IsClosedWorldModule = Options.IsClosedWorld;
1586 AC.Allowed = &Allowed;
1587 AC.IsModulePass = IsModulePass;
1588 AC.DeleteFns = DeleteFns;
1589 AC.DefaultInitializeLiveInternals = false;
1590 AC.IndirectCalleeSpecializationCallback =
1591 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1592 Function &Callee, unsigned NumAssumedCallees) {
1593 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1594 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1595 };
1596 AC.IPOAmendableCB = [](const Function &F) {
1597 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1598 };
1599
1600 Attributor A(Functions, InfoCache, AC);
1601
1602 LLVM_DEBUG({
1603 StringRef LTOPhaseStr = to_string(LTOPhase);
1604 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1605 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1606 << (AC.IsClosedWorldModule ? "" : "not ")
1607 << "assumed to be a closed world.\n";
1608 });
1609
1610 for (auto *F : Functions) {
1611 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1612 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1613 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1614 CallingConv::ID CC = F->getCallingConv();
1615 if (!AMDGPU::isEntryFunctionCC(CC)) {
1616 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1617 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1618 }
1619
1620 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1621 if (!F->isDeclaration() && ST.hasClusters())
1622 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1623
1624 if (ST.hasGFX90AInsts())
1625 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1626
1627 for (auto &I : instructions(F)) {
1628 Value *Ptr = nullptr;
1629 if (auto *LI = dyn_cast<LoadInst>(&I))
1630 Ptr = LI->getPointerOperand();
1631 else if (auto *SI = dyn_cast<StoreInst>(&I))
1632 Ptr = SI->getPointerOperand();
1633 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1634 Ptr = RMW->getPointerOperand();
1635 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1636 Ptr = CmpX->getPointerOperand();
1637
1638 if (Ptr) {
1639 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1640 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1641 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1642 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1643 A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1644 }
1645 }
1646 }
1647 }
1648
1649 return A.run() == ChangeStatus::CHANGED;
1650}
1651} // namespace
1652
1655
1658 AnalysisGetter AG(FAM);
1659
1660 SetVector<Function *> Functions;
1661 for (Function &F : M) {
1662 if (!F.isIntrinsic())
1663 Functions.insert(&F);
1664 }
1665
1666 // TODO: Probably preserves CFG
1667 return runImpl(Functions, /*IsModulePass=*/true, /*DeleteFns=*/true, M, AG,
1668 TM, Options, LTOPhase)
1671}
1672
1675 LazyCallGraph &CG,
1676 CGSCCUpdateResult &UR) {
1677
1679 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
1680 AnalysisGetter AG(FAM);
1681
1682 SetVector<Function *> Functions;
1683 for (LazyCallGraph::Node &N : C) {
1684 Function *F = &N.getFunction();
1685 if (!F->isIntrinsic())
1686 Functions.insert(F);
1687 }
1688
1690 Module *M = C.begin()->getFunction().getParent();
1691 // In the CGSCC pipeline, avoid untracked call graph modifications by
1692 // disabling function deletion, mirroring the generic AttributorCGSCCPass.
1693 return runImpl(Functions, /*IsModulePass=*/false, /*DeleteFns=*/false, *M, AG,
1697}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ UNKNOWN_INTRINSIC
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
A proxy from a FunctionAnalysisManager to an SCC.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A node in the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
AnalysisManager< LazyCallGraph::SCC, LazyCallGraph & > CGSCCAnalysisManager
The CGSCC analysis manager.
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define N
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:656
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.