LLVM 23.0.0git
MemProfUse.cpp
Go to the documentation of this file.
1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
25#include "llvm/IR/Module.h"
30#include "llvm/Support/BLAKE3.h"
32#include "llvm/Support/Debug.h"
36#include <map>
37#include <set>
38
39using namespace llvm;
40using namespace llvm::memprof;
41
42#define DEBUG_TYPE "memprof"
43
44namespace llvm {
49} // namespace llvm
50
51// By default disable matching of allocation profiles onto operator new that
52// already explicitly pass a hot/cold hint, since we don't currently
53// override these hints anyway.
55 "memprof-match-hot-cold-new",
57 "Match allocation profiles onto existing hot/cold operator new calls"),
58 cl::Hidden, cl::init(false));
59
60static cl::opt<bool>
61 ClPrintMemProfMatchInfo("memprof-print-match-info",
62 cl::desc("Print matching stats for each allocation "
63 "context in this module's profiles"),
64 cl::Hidden, cl::init(false));
65
67 "memprof-print-matched-alloc-stack",
68 cl::desc("Print full stack context for matched "
69 "allocations with -memprof-print-match-info."),
70 cl::Hidden, cl::init(false));
71
72static cl::opt<bool>
73 PrintFunctionGuids("memprof-print-function-guids",
74 cl::desc("Print function GUIDs computed for matching"),
75 cl::Hidden, cl::init(false));
76
77static cl::opt<bool>
78 SalvageStaleProfile("memprof-salvage-stale-profile",
79 cl::desc("Salvage stale MemProf profile"),
80 cl::init(false), cl::Hidden);
81
83 "memprof-attach-calleeguids",
85 "Attach calleeguids as value profile metadata for indirect calls."),
86 cl::init(true), cl::Hidden);
87
89 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
90 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
91
93 "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
94 cl::desc("If true, annotate the static data section prefix"));
95
96// Matching statistics
97STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
98STATISTIC(NumOfMemProfMismatch,
99 "Number of functions having mismatched memory profile hash.");
100STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
101STATISTIC(NumOfMemProfAllocContextProfiles,
102 "Number of alloc contexts in memory profile.");
103STATISTIC(NumOfMemProfCallSiteProfiles,
104 "Number of callsites in memory profile.");
105STATISTIC(NumOfMemProfMatchedAllocContexts,
106 "Number of matched memory profile alloc contexts.");
107STATISTIC(NumOfMemProfMatchedAllocs,
108 "Number of matched memory profile allocs.");
109STATISTIC(NumOfMemProfMatchedCallSites,
110 "Number of matched memory profile callsites.");
111STATISTIC(NumOfMemProfHotGlobalVars,
112 "Number of global vars annotated with 'hot' section prefix.");
113STATISTIC(NumOfMemProfColdGlobalVars,
114 "Number of global vars annotated with 'unlikely' section prefix.");
115STATISTIC(NumOfMemProfUnknownGlobalVars,
116 "Number of global vars with unknown hotness (no section prefix).");
117STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
118 "Number of global vars with user-specified section (not annotated).");
119
121 ArrayRef<uint64_t> InlinedCallStack,
122 LLVMContext &Ctx) {
123 I.setMetadata(LLVMContext::MD_callsite,
124 buildCallstackMetadata(InlinedCallStack, Ctx));
125}
126
128 uint32_t Column) {
131 HashBuilder.add(Function, LineOffset, Column);
133 uint64_t Id;
134 std::memcpy(&Id, Hash.data(), sizeof(Hash));
135 return Id;
136}
137
141
143 return getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
144 AllocInfo->Info.getAllocCount(),
145 AllocInfo->Info.getTotalLifetime());
146}
147
150 uint64_t FullStackId) {
151 SmallVector<uint64_t> StackIds;
152 for (const auto &StackFrame : AllocInfo->CallStack)
153 StackIds.push_back(computeStackId(StackFrame));
155 std::vector<ContextTotalSize> ContextSizeInfo;
157 auto TotalSize = AllocInfo->Info.getTotalSize();
158 assert(TotalSize);
159 assert(FullStackId != 0);
160 ContextSizeInfo.push_back({FullStackId, TotalSize});
161 }
162 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
163 return AllocType;
164}
165
166// Return true if InlinedCallStack, computed from a call instruction's debug
167// info, is a prefix of ProfileCallStack, a list of Frames from profile data
168// (either the allocation data or a callsite).
169static bool
171 ArrayRef<uint64_t> InlinedCallStack) {
172 return ProfileCallStack.size() >= InlinedCallStack.size() &&
173 llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),
174 InlinedCallStack, [](const Frame &F, uint64_t StackId) {
175 return computeStackId(F) == StackId;
176 });
177}
178
179static bool isAllocationWithHotColdVariant(const Function *Callee,
180 const TargetLibraryInfo &TLI) {
181 if (!Callee)
182 return false;
183 LibFunc Func;
184 if (!TLI.getLibFunc(*Callee, Func))
185 return false;
186 switch (Func) {
187 case LibFunc_Znwm:
188 case LibFunc_ZnwmRKSt9nothrow_t:
189 case LibFunc_ZnwmSt11align_val_t:
190 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
191 case LibFunc_Znam:
192 case LibFunc_ZnamRKSt9nothrow_t:
193 case LibFunc_ZnamSt11align_val_t:
194 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
195 case LibFunc_size_returning_new:
196 case LibFunc_size_returning_new_aligned:
197 return true;
198 case LibFunc_Znwm12__hot_cold_t:
199 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
200 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
201 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
202 case LibFunc_Znam12__hot_cold_t:
203 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
204 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
205 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
206 case LibFunc_size_returning_new_hot_cold:
207 case LibFunc_size_returning_new_aligned_hot_cold:
209 default:
210 return false;
211 }
212}
213
215 AnnotationKind Kind) {
217 "Should not handle AnnotationOK here");
218 SmallString<32> Reason;
219 switch (Kind) {
221 ++NumOfMemProfExplicitSectionGlobalVars;
222 Reason.append("explicit section name");
223 break;
225 Reason.append("linker declaration");
226 break;
228 Reason.append("name starts with `llvm.`");
229 break;
230 default:
231 llvm_unreachable("Unexpected annotation kind");
232 }
233 LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
234 << Reason << ".\n");
235}
236
237// Computes the LLVM version of MD5 hash for the content of a string
238// literal.
239static std::optional<uint64_t>
241 auto *Initializer = GVar.getInitializer();
242 if (!Initializer)
243 return std::nullopt;
244 if (auto *C = dyn_cast<ConstantDataSequential>(Initializer))
245 if (C->isString()) {
246 // Note the hash computed for the literal would include the null byte.
247 return llvm::MD5Hash(C->getAsString());
248 }
249 return std::nullopt;
250}
251
252// Structure for tracking info about matched allocation contexts for use with
253// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
255 // Total size in bytes of matched context.
257 // Matched allocation's type.
259 // Number of frames matched to the allocation itself (values will be >1 in
260 // cases where allocation was already inlined). Use a set because there can
261 // be multiple inlined instances and each may have a different inline depth.
262 // Use std::set to iterate in sorted order when printing.
263 std::set<unsigned> MatchedFramesSet;
264 // The full call stack of the allocation, for cases where requested via
265 // -memprof-print-matched-alloc-stack.
266 std::vector<Frame> CallStack;
267
268 // Caller responsible for inserting the matched frames and the call stack when
269 // appropriate.
272};
273
276 function_ref<bool(uint64_t)> IsPresentInProfile) {
278
279 auto GetOffset = [](const DILocation *DIL) {
280 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
281 0xffff;
282 };
283
284 for (Function &F : M) {
285 if (F.isDeclaration())
286 continue;
287
288 for (auto &BB : F) {
289 for (auto &I : BB) {
291 continue;
292
293 auto *CB = dyn_cast<CallBase>(&I);
294 auto *CalledFunction = CB->getCalledFunction();
295 // Disregard indirect calls and intrinsics.
296 if (!CalledFunction || CalledFunction->isIntrinsic())
297 continue;
298
299 StringRef CalleeName = CalledFunction->getName();
300 // True if we are calling a heap allocation function that supports
301 // hot/cold variants.
302 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
303 // True for the first iteration below, indicating that we are looking at
304 // a leaf node.
305 bool IsLeaf = true;
306 for (const DILocation *DIL = I.getDebugLoc(); DIL;
307 DIL = DIL->getInlinedAt()) {
308 StringRef CallerName = DIL->getSubprogramLinkageName();
309 assert(!CallerName.empty() &&
310 "Be sure to enable -fdebug-info-for-profiling");
311 uint64_t CallerGUID = memprof::getGUID(CallerName);
312 uint64_t CalleeGUID = memprof::getGUID(CalleeName);
313 // Pretend that we are calling a function with GUID == 0 if we are
314 // in the inline stack leading to a heap allocation function.
315 if (IsAlloc) {
316 if (IsLeaf) {
317 // For leaf nodes, set CalleeGUID to 0 without consulting
318 // IsPresentInProfile.
319 CalleeGUID = 0;
320 } else if (!IsPresentInProfile(CalleeGUID)) {
321 // In addition to the leaf case above, continue to set CalleeGUID
322 // to 0 as long as we don't see CalleeGUID in the profile.
323 CalleeGUID = 0;
324 } else {
325 // Once we encounter a callee that exists in the profile, stop
326 // setting CalleeGUID to 0.
327 IsAlloc = false;
328 }
329 }
330
331 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
332 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
333 CalleeName = CallerName;
334 IsLeaf = false;
335 }
336 }
337 }
338 }
339
340 // Sort each call list by the source location.
341 for (auto &[CallerGUID, CallList] : Calls) {
342 llvm::sort(CallList);
343 CallList.erase(llvm::unique(CallList), CallList.end());
344 }
345
346 return Calls;
347}
348
351 const TargetLibraryInfo &TLI) {
353
355 MemProfReader->getMemProfCallerCalleePairs();
357 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
358 return CallsFromProfile.contains(GUID);
359 });
360
361 // Compute an undrift map for each CallerGUID.
362 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
363 auto It = CallsFromProfile.find(CallerGUID);
364 if (It == CallsFromProfile.end())
365 continue;
366 const auto &ProfileAnchors = It->second;
367
368 LocToLocMap Matchings;
370 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
371 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
372 [[maybe_unused]] bool Inserted =
373 UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;
374
375 // The insertion must succeed because we visit each GUID exactly once.
376 assert(Inserted);
377 }
378
379 return UndriftMaps;
380}
381
382// Given a MemProfRecord, undrift all the source locations present in the
383// record in place.
384static void
386 memprof::MemProfRecord &MemProfRec) {
387 // Undrift a call stack in place.
388 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
389 for (auto &F : CallStack) {
390 auto I = UndriftMaps.find(F.Function);
391 if (I == UndriftMaps.end())
392 continue;
393 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
394 if (J == I->second.end())
395 continue;
396 auto &NewLoc = J->second;
397 F.LineOffset = NewLoc.LineOffset;
398 F.Column = NewLoc.Column;
399 }
400 };
401
402 for (auto &AS : MemProfRec.AllocSites)
403 UndriftCallStack(AS.CallStack);
404
405 for (auto &CS : MemProfRec.CallSites)
406 UndriftCallStack(CS.Frames);
407}
408
409// Helper function to process CalleeGuids and create value profile metadata
411 ArrayRef<GlobalValue::GUID> CalleeGuids) {
412 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
413 return;
414
415 // Prepare the vector of value data, initializing from any existing
416 // value-profile metadata present on the instruction so that we merge the
417 // new CalleeGuids into the existing entries.
419 uint64_t TotalCount = 0;
420
421 if (I.getMetadata(LLVMContext::MD_prof)) {
422 // Read all existing entries so we can merge them. Use a large
423 // MaxNumValueData to retrieve all existing entries.
424 VDs = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
425 /*MaxNumValueData=*/UINT32_MAX, TotalCount);
426 }
427
428 // Save the original size for use later in detecting whether any were added.
429 const size_t OriginalSize = VDs.size();
430
431 // Initialize the set of existing guids with the original list.
432 DenseSet<uint64_t> ExistingValues(
435 VDs, [](const InstrProfValueData &Entry) { return Entry.Value; }));
436
437 // Merge CalleeGuids into list of existing VDs, by appending any that are not
438 // already included.
439 VDs.reserve(OriginalSize + CalleeGuids.size());
440 for (auto G : CalleeGuids) {
441 if (!ExistingValues.insert(G).second)
442 continue;
443 InstrProfValueData NewEntry;
444 NewEntry.Value = G;
445 // For MemProf, we don't have actual call counts, so we assign
446 // a weight of 1 to each potential target.
447 // TODO: Consider making this weight configurable or increasing it to
448 // improve effectiveness for ICP.
449 NewEntry.Count = 1;
450 TotalCount += NewEntry.Count;
451 VDs.push_back(NewEntry);
452 }
453
454 // Update the VP metadata if we added any new callee GUIDs to the list.
455 assert(VDs.size() >= OriginalSize);
456 if (VDs.size() == OriginalSize)
457 return;
458
459 // First clear the existing !prof.
460 I.setMetadata(LLVMContext::MD_prof, nullptr);
461
462 // No need to sort the updated VDs as all appended entries have the same count
463 // of 1, which is no larger than any existing entries. The incoming list of
464 // CalleeGuids should already be deterministic for a given profile.
465 annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, VDs.size());
466}
467
468static void handleAllocSite(
469 Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
470 LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
471 const std::set<const AllocationInfo *> &AllocInfoSet,
472 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
473 // TODO: Remove this once the profile creation logic deduplicates contexts
474 // that are the same other than the IsInlineFrame bool. Until then, keep the
475 // largest.
476 DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
477 for (auto *AllocInfo : AllocInfoSet) {
478 auto FullStackId = computeFullStackId(AllocInfo->CallStack);
479 auto [It, Inserted] =
480 UniqueFullContextIdAllocInfo.insert({FullStackId, AllocInfo});
481 // If inserted entry, done.
482 if (Inserted)
483 continue;
484 // Keep the larger one, or the noncold one if they are the same size.
485 auto CurSize = It->second->Info.getTotalSize();
486 auto NewSize = AllocInfo->Info.getTotalSize();
487 if ((CurSize > NewSize) ||
488 (CurSize == NewSize &&
490 continue;
491 It->second = AllocInfo;
492 }
493 // We may match this instruction's location list to multiple MIB
494 // contexts. Add them to a Trie specialized for trimming the contexts to
495 // the minimal needed to disambiguate contexts with unique behavior.
496 CallStackTrie AllocTrie(&ORE, MaxColdSize);
497 uint64_t TotalSize = 0;
498 uint64_t TotalColdSize = 0;
499 for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
500 // Check the full inlined call stack against this one.
501 // If we found and thus matched all frames on the call, include
502 // this MIB.
504 InlinedCallStack)) {
505 NumOfMemProfMatchedAllocContexts++;
506 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
507 TotalSize += AllocInfo->Info.getTotalSize();
509 TotalColdSize += AllocInfo->Info.getTotalSize();
510 // Record information about the allocation if match info printing
511 // was requested.
513 assert(FullStackId != 0);
514 auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
515 FullStackId,
516 AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
517 // Always insert the new matched frame count, since it may differ.
518 Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
519 if (Inserted && PrintMatchedAllocStack)
520 Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
521 AllocInfo->CallStack.begin(),
522 AllocInfo->CallStack.end());
523 }
524 ORE.emit(
525 OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
526 << ore::NV("AllocationCall", CI) << " in function "
527 << ore::NV("Caller", CI->getFunction())
528 << " matched alloc context with alloc type "
530 << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize())
531 << " full context id " << ore::NV("Context", FullStackId)
532 << " frame count " << ore::NV("Frames", InlinedCallStack.size()));
533 }
534 }
535 // If the threshold for the percent of cold bytes is less than 100%,
536 // and not all bytes are cold, see if we should still hint this
537 // allocation as cold without context sensitivity.
538 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
539 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
540 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, "dominant");
541 return;
542 }
543
544 // We might not have matched any to the full inlined call stack.
545 // But if we did, create and attach metadata, or a function attribute if
546 // all contexts have identical profiled behavior.
547 if (!AllocTrie.empty()) {
548 NumOfMemProfMatchedAllocs++;
549 // MemprofMDAttached will be false if a function attribute was
550 // attached.
551 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
552 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
553 if (MemprofMDAttached) {
554 // Add callsite metadata for the instruction's location list so that
555 // it simpler later on to identify which part of the MIB contexts
556 // are from this particular instruction (including during inlining,
557 // when the callsite metadata will be updated appropriately).
558 // FIXME: can this be changed to strip out the matching stack
559 // context ids from the MIB contexts and not add any callsite
560 // metadata here to save space?
561 addCallsiteMetadata(I, InlinedCallStack, Ctx);
562 }
563 }
564}
565
566// Helper struct for maintaining refs to callsite data. As an alternative we
567// could store a pointer to the CallSiteInfo struct but we also need the frame
568// index. Using ArrayRefs instead makes it a little easier to read.
570 // Subset of frames for the corresponding CallSiteInfo.
572 // Potential targets for indirect calls.
574};
575
576static void handleCallSite(Instruction &I, const Function *CalledFunction,
577 ArrayRef<uint64_t> InlinedCallStack,
578 const std::vector<CallSiteEntry> &CallSiteEntries,
579 Module &M,
580 std::set<std::vector<uint64_t>> &MatchedCallSites,
582 auto &Ctx = M.getContext();
583 // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
584 // to support cases where the instuction's inlined frames match multiple call
585 // site entries, which can happen if the profile was collected from a binary
586 // where this instruction was eventually inlined into multiple callers.
588 bool CallsiteMDAdded = false;
589 for (const auto &CallSiteEntry : CallSiteEntries) {
590 // If we found and thus matched all frames on the call, create and
591 // attach call stack metadata.
593 InlinedCallStack)) {
594 NumOfMemProfMatchedCallSites++;
595 // Only need to find one with a matching call stack and add a single
596 // callsite metadata.
597 if (!CallsiteMDAdded) {
598 addCallsiteMetadata(I, InlinedCallStack, Ctx);
599
600 // Accumulate call site matching information upon request.
602 std::vector<uint64_t> CallStack;
603 append_range(CallStack, InlinedCallStack);
604 MatchedCallSites.insert(std::move(CallStack));
605 }
606 OptimizationRemark Remark(DEBUG_TYPE, "MemProfUse", &I);
607 Remark << ore::NV("CallSite", &I) << " in function "
608 << ore::NV("Caller", I.getFunction())
609 << " matched callsite with frame count "
610 << ore::NV("Frames", InlinedCallStack.size())
611 << " and stack ids";
612 for (uint64_t StackId : InlinedCallStack)
613 Remark << " " << ore::NV("StackId", StackId);
614 ORE.emit(Remark);
615
616 // If this is a direct call, we're done.
617 if (CalledFunction)
618 break;
619 CallsiteMDAdded = true;
620 }
621
622 assert(!CalledFunction && "Didn't expect direct call");
623
624 // Collect Callee GUIDs from all matching CallSiteEntries.
627 }
628 }
629 // Try to attach indirect call metadata if possible.
630 addVPMetadata(M, I, CalleeGuids.getArrayRef());
631}
632
633static void
635 const TargetLibraryInfo &TLI,
636 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
637 std::set<std::vector<uint64_t>> &MatchedCallSites,
639 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
640 auto &Ctx = M.getContext();
641 // Previously we used getIRPGOFuncName() here. If F is local linkage,
642 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
643 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
644 // contain FileName's prefix. It caused local linkage function can't
645 // find MemProfRecord. So we use getName() now.
646 // 'unique-internal-linkage-names' can make MemProf work better for local
647 // linkage function.
648 auto FuncName = F.getName();
649 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
651 errs() << "MemProf: Function GUID " << FuncGUID << " is " << FuncName
652 << "\n";
653 std::optional<memprof::MemProfRecord> MemProfRec;
654 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
655 if (Err) {
656 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
657 auto Err = IPE.get();
658 bool SkipWarning = false;
659 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
660 << ": ");
662 NumOfMemProfMissing++;
663 SkipWarning = !PGOWarnMissing;
664 LLVM_DEBUG(dbgs() << "unknown function");
665 } else if (Err == instrprof_error::hash_mismatch) {
666 NumOfMemProfMismatch++;
667 SkipWarning =
670 (F.hasComdat() ||
672 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
673 }
674
675 if (SkipWarning)
676 return;
677
678 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
679 Twine(" Hash = ") + std::to_string(FuncGUID))
680 .str();
681
682 Ctx.diagnose(
683 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
684 });
685 return;
686 }
687
688 NumOfMemProfFunc++;
689
690 // If requested, undrfit MemProfRecord so that the source locations in it
691 // match those in the IR.
693 undriftMemProfRecord(UndriftMaps, *MemProfRec);
694
695 // Detect if there are non-zero column numbers in the profile. If not,
696 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
697 // columns in the IR). The profiled binary might have been built with
698 // column numbers disabled, for example.
699 bool ProfileHasColumns = false;
700
701 // Build maps of the location hash to all profile data with that leaf location
702 // (allocation info and the callsites).
703 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
704
705 // For the callsites we need to record slices of the frame array (see comments
706 // below where the map entries are added) along with their CalleeGuids.
707 std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
708 for (auto &AI : MemProfRec->AllocSites) {
709 NumOfMemProfAllocContextProfiles++;
710 // Associate the allocation info with the leaf frame. The later matching
711 // code will match any inlined call sequences in the IR with a longer prefix
712 // of call stack frames.
713 uint64_t StackId = computeStackId(AI.CallStack[0]);
714 LocHashToAllocInfo[StackId].insert(&AI);
715 ProfileHasColumns |= AI.CallStack[0].Column;
716 }
717 for (auto &CS : MemProfRec->CallSites) {
718 NumOfMemProfCallSiteProfiles++;
719 // Need to record all frames from leaf up to and including this function,
720 // as any of these may or may not have been inlined at this point.
721 unsigned Idx = 0;
722 for (auto &StackFrame : CS.Frames) {
723 uint64_t StackId = computeStackId(StackFrame);
724 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
725 // The callee guids for the slice containing all frames (due to the
726 // increment above Idx is now 1) comes from the CalleeGuids recorded in
727 // the CallSite. For the slices not containing the leaf-most frame, the
728 // callee guid is simply the function GUID of the prior frame.
729 LocHashToCallSites[StackId].push_back(
730 {FrameSlice, (Idx == 1 ? CS.CalleeGuids
732 CS.Frames[Idx - 2].Function))});
733
734 ProfileHasColumns |= StackFrame.Column;
735 // Once we find this function, we can stop recording.
736 if (StackFrame.Function == FuncGUID)
737 break;
738 }
739 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
740 }
741
742 auto GetOffset = [](const DILocation *DIL) {
743 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
744 0xffff;
745 };
746
747 // Now walk the instructions, looking up the associated profile data using
748 // debug locations.
749 for (auto &BB : F) {
750 for (auto &I : BB) {
751 if (I.isDebugOrPseudoInst())
752 continue;
753 // We are only interested in calls (allocation or interior call stack
754 // context calls).
755 auto *CI = dyn_cast<CallBase>(&I);
756 if (!CI)
757 continue;
758 auto *CalledFunction = CI->getCalledFunction();
759 if (CalledFunction && CalledFunction->isIntrinsic())
760 continue;
761 // List of call stack ids computed from the location hashes on debug
762 // locations (leaf to inlined at root).
763 SmallVector<uint64_t, 8> InlinedCallStack;
764 // Was the leaf location found in one of the profile maps?
765 bool LeafFound = false;
766 // If leaf was found in a map, iterators pointing to its location in both
767 // of the maps. It might exist in neither, one, or both (the latter case
768 // can happen because we don't currently have discriminators to
769 // distinguish the case when a single line/col maps to both an allocation
770 // and another callsite).
771 auto AllocInfoIter = LocHashToAllocInfo.end();
772 auto CallSitesIter = LocHashToCallSites.end();
773 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
774 DIL = DIL->getInlinedAt()) {
775 // Use C++ linkage name if possible. Need to compile with
776 // -fdebug-info-for-profiling to get linkage name.
777 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
778 if (Name.empty())
779 Name = DIL->getScope()->getSubprogram()->getName();
780 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
781 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
782 ProfileHasColumns ? DIL->getColumn() : 0);
783 // Check if we have found the profile's leaf frame. If yes, collect
784 // the rest of the call's inlined context starting here. If not, see if
785 // we find a match further up the inlined context (in case the profile
786 // was missing debug frames at the leaf).
787 if (!LeafFound) {
788 AllocInfoIter = LocHashToAllocInfo.find(StackId);
789 CallSitesIter = LocHashToCallSites.find(StackId);
790 if (AllocInfoIter != LocHashToAllocInfo.end() ||
791 CallSitesIter != LocHashToCallSites.end())
792 LeafFound = true;
793 }
794 if (LeafFound)
795 InlinedCallStack.push_back(StackId);
796 }
797 // If leaf not in either of the maps, skip inst.
798 if (!LeafFound)
799 continue;
800
801 // First add !memprof metadata from allocation info, if we found the
802 // instruction's leaf location in that map, and if the rest of the
803 // instruction's locations match the prefix Frame locations on an
804 // allocation context with the same leaf.
805 if (AllocInfoIter != LocHashToAllocInfo.end() &&
806 // Only consider allocations which support hinting.
807 isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
808 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
809 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
810 else if (CallSitesIter != LocHashToCallSites.end())
811 // Otherwise, add callsite metadata. If we reach here then we found the
812 // instruction's leaf location in the callsites map and not the
813 // allocation map.
814 handleCallSite(I, CalledFunction, InlinedCallStack,
815 CallSitesIter->second, M, MatchedCallSites, ORE);
816 }
817 }
818}
819
820MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
822 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
823 if (!FS)
824 this->FS = vfs::getRealFileSystem();
825}
826
828 // Return immediately if the module doesn't contain any function or global
829 // variables.
830 if (M.empty() && M.globals().empty())
831 return PreservedAnalyses::all();
832
833 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
834 auto &Ctx = M.getContext();
835 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
836 if (Error E = ReaderOrErr.takeError()) {
837 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
838 Ctx.diagnose(
839 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
840 });
841 return PreservedAnalyses::all();
842 }
843
844 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
845 std::move(ReaderOrErr.get());
846 if (!MemProfReader) {
847 Ctx.diagnose(DiagnosticInfoPGOProfile(
848 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
849 return PreservedAnalyses::all();
850 }
851
852 if (!MemProfReader->hasMemoryProfile()) {
853 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
854 "Not a memory profile"));
855 return PreservedAnalyses::all();
856 }
857
858 const bool Changed =
859 annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
860
861 // If the module doesn't contain any function, return after we process all
862 // global variables.
863 if (M.empty())
865
866 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
867
868 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
871 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
872
873 // Map from the stack hash of each matched allocation context in the function
874 // profiles to match info such as the total profiled size (bytes), allocation
875 // type, number of frames matched to the allocation itself, and the full array
876 // of call stack ids.
877 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
878
879 // Set of the matched call sites, each expressed as a sequence of an inline
880 // call stack.
881 std::set<std::vector<uint64_t>> MatchedCallSites;
882
883 uint64_t MaxColdSize = 0;
884 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
885 MaxColdSize = MemProfSum->getMaxColdTotalSize();
886
887 for (auto &F : M) {
888 if (F.isDeclaration())
889 continue;
890
891 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
892 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
893 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
894 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
895 }
896
898 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
899 for (auto Frames : Info.MatchedFramesSet) {
900 // TODO: To reduce verbosity, should we change the existing message
901 // so that we emit a list of matched frame counts in a single message
902 // about the context (instead of one message per frame count?
903 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
904 << " context with id " << Id << " has total profiled size "
905 << Info.TotalSize << " is matched with " << Frames << " frames";
907 errs() << " and call stack";
908 for (auto &F : Info.CallStack)
909 errs() << " " << computeStackId(F);
910 }
911 errs() << "\n";
912 }
913 }
914
915 for (const auto &CallStack : MatchedCallSites) {
916 errs() << "MemProf callsite match for inline call stack";
917 for (uint64_t StackId : CallStack)
918 errs() << " " << StackId;
919 errs() << "\n";
920 }
921 }
922
924}
925
926bool MemProfUsePass::annotateGlobalVariables(
927 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
928 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
929 return false;
930
931 if (!DataAccessProf) {
932 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
933 // FIXME: Add a diagnostic message without failing the compilation when
934 // data access profile payload is not available.
935 return false;
936 }
937 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
938
939 bool Changed = false;
940 // Iterate all global variables in the module and annotate them based on
941 // data access profiles. Note it's up to the linker to decide how to map input
942 // sections to output sections, and one conservative practice is to map
943 // unlikely-prefixed ones to unlikely output section, and map the rest
944 // (hot-prefixed or prefix-less) to the canonical output section.
945 for (GlobalVariable &GVar : M.globals()) {
946 assert(!GVar.getSectionPrefix().has_value() &&
947 "GVar shouldn't have section prefix yet");
948 auto Kind = llvm::memprof::getAnnotationKind(GVar);
951 continue;
952 }
953
954 StringRef Name = GVar.getName();
955 SymbolHandleRef Handle = SymbolHandleRef(Name);
956 // Skip string literals as their mangled names don't stay stable across
957 // binary releases.
959 if (Name.starts_with(".str"))
960 continue;
961
962 if (Name.starts_with(".str")) {
963 std::optional<uint64_t> Hash = getStringContentHash(GVar);
964 if (!Hash) {
965 LLVM_DEBUG(dbgs() << "Cannot compute content hash for string literal "
966 << Name << "\n");
967 continue;
968 }
969 Handle = SymbolHandleRef(Hash.value());
970 }
971
972 // DataAccessProfRecord's get* methods will canonicalize the name under the
973 // hood before looking it up, so optimizer doesn't need to do it.
974 std::optional<DataAccessProfRecord> Record =
975 DataAccessProf->getProfileRecord(Handle);
976 // Annotate a global variable as hot if it has non-zero sampled count, and
977 // annotate it as cold if it's seen in the profiled binary
978 // file but doesn't have any access sample.
979 // For logging, optimization remark emitter requires a llvm::Function, but
980 // it's not well defined how to associate a global variable with a function.
981 // So we just print out the static data section prefix in LLVM_DEBUG.
982 if (Record && Record->AccessCount > 0) {
983 ++NumOfMemProfHotGlobalVars;
984 Changed |= GVar.setSectionPrefix("hot");
985 LLVM_DEBUG(dbgs() << "Global variable " << Name
986 << " is annotated as hot\n");
987 } else if (DataAccessProf->isKnownColdSymbol(Handle)) {
988 ++NumOfMemProfColdGlobalVars;
989 Changed |= GVar.setSectionPrefix("unlikely");
990 Changed = true;
991 LLVM_DEBUG(dbgs() << "Global variable " << Name
992 << " is annotated as unlikely\n");
993 } else {
994 ++NumOfMemProfUnknownGlobalVars;
995 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
996 }
997 }
998
999 return Changed;
1000}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static std::optional< uint64_t > getStringContentHash(const GlobalVariable &GVar)
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, AnnotationKind Kind)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > PrintMatchedAllocStack("memprof-print-matched-alloc-stack", cl::desc("Print full stack context for matched " "allocations with -memprof-print-match-info."), cl::Hidden, cl::init(false))
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::vector< CallSiteEntry > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites, OptimizationRemarkEmitter &ORE)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > PrintFunctionGuids("memprof-print-function-guids", cl::desc("Print function GUIDs computed for matching"), cl::Hidden, cl::init(false))
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
AllocType
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:219
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:78
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition HashBuilder.h:64
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
instrprof_error get() const
Definition InstrProf.h:464
std::string message() const override
Return the error message as a string.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
@ Warning
Emits a warning if two values disagree.
Definition Module.h:124
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition MemProfUse.h:65
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
std::variant< StringRef, uint64_t > SymbolHandleRef
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
AnnotationKind getAnnotationKind(const GlobalVariable &GV)
Returns the annotation kind of the global variable GV.
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition MemProf.cpp:344
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
DiagnosticInfoOptimizationBase::Argument NV
uint64_t MD5Hash(const FunctionId &Obj)
Definition FunctionId.h:167
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition BLAKE3.h:35
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
cl::opt< bool > AnnotateStringLiteralSectionPrefix("memprof-annotate-string-literal-section-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the string literal data section prefix"))
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
cl::opt< bool > NoPGOWarnMismatchComdatWeak
std::set< unsigned > MatchedFramesSet
uint64_t TotalSize
std::vector< Frame > CallStack
AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
AllocationType AllocType
ArrayRef< GlobalValue::GUID > CalleeGuids
ArrayRef< Frame > Frames
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition MemProf.h:245
uint32_t LineOffset
Definition MemProf.h:250
llvm::SmallVector< CallSiteInfo > CallSites
Definition MemProf.h:522
llvm::SmallVector< AllocationInfo > AllocSites
Definition MemProf.h:520