52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
115 "Number of aliasees prevailing in a different module than its alias");
120 cl::desc(
"Specify the path prefix of the MemProf dot files."));
124 cl::desc(
"Export graph to dot files."));
129 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
139 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
144 "Export only nodes with contexts feeding given "
145 "-memprof-dot-alloc-id"),
147 "Export only nodes with given -memprof-dot-context-id")));
151 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
152 "or to highlight if -memprof-dot-scope=all"));
156 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
157 "highlight otherwise"));
161 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
165 cl::desc(
"Perform verification checks on CallingContextGraph."));
169 cl::desc(
"Perform frequent verification checks on nodes."));
172 "memprof-import-summary",
173 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
179 cl::desc(
"Max depth to recursively search for missing "
180 "frames through tail calls."));
185 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
189 cl::desc(
"Allow cloning of contexts through recursive cycles"));
196 cl::desc(
"Merge clones before assigning functions"));
205 cl::desc(
"Allow cloning of contexts having recursive cycles"));
211 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
222 cl::desc(
"Linking with hot/cold operator new interfaces"));
227 "Require target function definition when promoting indirect calls"));
234 cl::desc(
"Number of largest cold contexts to consider important"));
238 cl::desc(
"Enables edge fixup for important contexts"));
260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
261class CallsiteContextGraph {
263 CallsiteContextGraph() =
default;
264 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
265 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
269 EmitRemark =
nullptr,
270 bool AllowExtraAnalysis =
false);
274 void identifyClones();
281 bool assignFunctions();
287 EmitRemark =
nullptr)
const;
290 const CallsiteContextGraph &CCG) {
296 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
298 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
300 void exportToDot(std::string Label)
const;
303 struct FuncInfo final
304 :
public std::pair<FuncTy *, unsigned > {
305 using Base = std::pair<FuncTy *, unsigned>;
307 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
308 explicit operator bool()
const {
return this->first !=
nullptr; }
309 FuncTy *func()
const {
return this->first; }
310 unsigned cloneNo()
const {
return this->second; }
314 struct CallInfo final :
public std::pair<CallTy, unsigned > {
315 using Base = std::pair<CallTy, unsigned>;
317 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
319 explicit operator bool()
const {
return (
bool)this->first; }
320 CallTy call()
const {
return this->first; }
321 unsigned cloneNo()
const {
return this->second; }
322 void setCloneNo(
unsigned N) { this->second =
N; }
324 if (!
operator bool()) {
330 OS <<
"\t(clone " << cloneNo() <<
")";
356 bool Recursive =
false;
383 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
387 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
391 bool useCallerEdgesForContextInfo()
const {
396 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
414 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
415 Count += Edge->getContextIds().size();
419 CalleeEdges, useCallerEdgesForContextInfo()
421 : std::vector<std::shared_ptr<ContextEdge>>());
422 for (
const auto &Edge : Edges)
429 uint8_t computeAllocType()
const {
434 CalleeEdges, useCallerEdgesForContextInfo()
436 : std::vector<std::shared_ptr<ContextEdge>>());
437 for (
const auto &Edge : Edges) {
448 bool emptyContextIds()
const {
450 CalleeEdges, useCallerEdgesForContextInfo()
452 : std::vector<std::shared_ptr<ContextEdge>>());
453 for (
const auto &Edge : Edges) {
454 if (!Edge->getContextIds().empty())
461 std::vector<ContextNode *> Clones;
464 ContextNode *CloneOf =
nullptr;
466 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
468 ContextNode(
bool IsAllocation, CallInfo
C)
469 : IsAllocation(IsAllocation),
Call(
C) {}
471 void addClone(ContextNode *Clone) {
473 CloneOf->Clones.push_back(Clone);
474 Clone->CloneOf = CloneOf;
476 Clones.push_back(Clone);
478 Clone->CloneOf =
this;
482 ContextNode *getOrigNode() {
489 unsigned int ContextId);
491 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
492 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
493 void eraseCalleeEdge(
const ContextEdge *Edge);
494 void eraseCallerEdge(
const ContextEdge *Edge);
496 void setCall(CallInfo
C) {
Call = std::move(
C); }
498 bool hasCall()
const {
return (
bool)
Call.call(); }
504 bool isRemoved()
const {
540 bool IsBackedge =
false;
547 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
548 ContextIds(std::move(ContextIds)) {}
554 inline void clear() {
564 inline bool isRemoved()
const {
565 if (Callee || Caller)
586 void removeNoneTypeCalleeEdges(ContextNode *
Node);
587 void removeNoneTypeCallerEdges(ContextNode *
Node);
589 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
595 template <
class NodeT,
class IteratorT>
596 std::vector<uint64_t>
601 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
604 template <
class NodeT,
class IteratorT>
605 void addStackNodesForMIB(
609 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
614 void updateStackNodes();
623 void fixupImportantContexts();
627 void handleCallsitesWithMultipleTargets();
630 void markBackedges();
640 bool partitionCallsByCallee(
642 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
649 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
656 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
661 struct CallContextInfo {
665 std::vector<uint64_t> StackIds;
679 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
680 bool CalleeIter =
true);
688 void assignStackNodesPostOrder(
702 void propagateDuplicateContextIds(
708 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
716 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
726 calleesMatch(CallTy
Call, EdgeIter &EI,
731 const FuncTy *getCalleeFunc(CallTy
Call) {
732 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
738 bool calleeMatchesFunc(
739 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
740 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
741 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
742 Call, Func, CallerFunc, FoundCalleeChain);
746 bool sameCallee(CallTy Call1, CallTy Call2) {
747 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
752 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
753 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
759 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
765 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
770 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
775 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
776 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
782 FuncInfo cloneFunctionForCallsite(
784 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
785 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
786 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
791 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
792 unsigned CloneNo)
const {
793 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
797 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
798 CallInfo
C = CallInfo()) {
799 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
800 auto *NewNode = NodeOwner.back().get();
802 NodeToCallingFunc[NewNode] =
F;
803 NewNode->NodeId = NodeOwner.size();
808 ContextNode *getNodeForInst(
const CallInfo &
C);
809 ContextNode *getNodeForAlloc(
const CallInfo &
C);
810 ContextNode *getNodeForStackId(
uint64_t StackId);
832 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
839 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
840 ContextNode *NewCallee,
841 bool NewClone =
false,
849 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
850 ContextNode *NewCaller);
861 void mergeNodeCalleeClones(
866 void findOtherCallersToShareMerge(
867 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
895 struct ImportantContextInfo {
897 std::vector<uint64_t> StackIds;
900 unsigned MaxLength = 0;
904 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
913 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
927 auto Size = StackIds.size();
928 for (
auto Id : Ids) {
929 auto &Entry = ImportantContextIdInfo[Id];
930 Entry.StackIdsToNode[StackIds] =
Node;
932 if (
Size > Entry.MaxLength)
933 Entry.MaxLength =
Size;
942 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
948 unsigned int LastContextId = 0;
951template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
954template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
956 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
957template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
959 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
960template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
962 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
965class ModuleCallsiteContextGraph
966 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
969 ModuleCallsiteContextGraph(
971 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
974 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
977 uint64_t getStackId(uint64_t IdOrIndex)
const;
979 bool calleeMatchesFunc(
980 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
981 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
982 bool sameCallee(Instruction *Call1, Instruction *Call2);
983 bool findProfiledCalleeThroughTailCalls(
984 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
985 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
986 bool &FoundMultipleCalleeChains);
987 uint64_t getLastStackId(Instruction *
Call);
988 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
991 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
992 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
994 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
995 DenseMap<CallInfo, CallInfo> &CallMap,
996 std::vector<CallInfo> &CallsWithMetadataInFunc,
998 std::string getLabel(
const Function *Func,
const Instruction *
Call,
999 unsigned CloneNo)
const;
1002 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1008struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1009 IndexCall() : PointerUnion() {}
1010 IndexCall(std::nullptr_t) : IndexCall() {}
1011 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1012 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1013 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1015 IndexCall *operator->() {
return this; }
1017 void print(raw_ostream &OS)
const {
1018 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1043class IndexCallsiteContextGraph
1044 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1047 IndexCallsiteContextGraph(
1048 ModuleSummaryIndex &Index,
1052 ~IndexCallsiteContextGraph() {
1057 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1059 for (
auto &Callsite :
I.second)
1060 FS->addCallsite(*Callsite.second);
1065 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1068 uint64_t getStackId(uint64_t IdOrIndex)
const;
1069 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1070 bool calleeMatchesFunc(
1071 IndexCall &
Call,
const FunctionSummary *Func,
1072 const FunctionSummary *CallerFunc,
1073 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1074 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1075 bool findProfiledCalleeThroughTailCalls(
1076 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1077 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1078 bool &FoundMultipleCalleeChains);
1079 uint64_t getLastStackId(IndexCall &
Call);
1080 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1083 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1084 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1085 IndexCall>::FuncInfo
1086 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1087 DenseMap<CallInfo, CallInfo> &CallMap,
1088 std::vector<CallInfo> &CallsWithMetadataInFunc,
1090 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1091 unsigned CloneNo)
const;
1092 DenseSet<GlobalValue::GUID> findAliaseeGUIDsPrevailingInDifferentModule();
1096 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1098 const ModuleSummaryIndex &Index;
1106 std::unordered_map<FunctionSummary *,
1107 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1108 FunctionCalleesToSynthesizedCallsiteInfos;
1119 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1122 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1143template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1144bool allocTypesMatch(
1145 const std::vector<uint8_t> &InAllocTypes,
1146 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1150 assert(InAllocTypes.size() == Edges.size());
1152 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1154 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1158 if (l == (uint8_t)AllocationType::None ||
1159 r->AllocTypes == (uint8_t)AllocationType::None)
1161 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1170template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1171bool allocTypesMatchClone(
1172 const std::vector<uint8_t> &InAllocTypes,
1173 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1174 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1178 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1182 for (
const auto &
E : Clone->CalleeEdges) {
1184 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1188 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1189 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1191 if (Iter == EdgeCalleeMap.
end())
1199 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1207template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1208typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1209CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1210 const CallInfo &
C) {
1211 ContextNode *
Node = getNodeForAlloc(
C);
1215 return NonAllocationCallToContextNodeMap.lookup(
C);
1218template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1219typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1220CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1221 const CallInfo &
C) {
1222 return AllocationCallToContextNodeMap.lookup(
C);
1225template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1226typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1227CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1229 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1230 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1231 return StackEntryNode->second;
1235template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1236void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1238 unsigned int ContextId) {
1239 for (
auto &
Edge : CallerEdges) {
1240 if (
Edge->Caller == Caller) {
1242 Edge->getContextIds().insert(ContextId);
1246 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1247 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1248 CallerEdges.push_back(
Edge);
1252template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1253void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1254 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1270 auto CalleeCallerCount =
Callee->CallerEdges.size();
1271 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1276 }
else if (CalleeIter) {
1278 *EI =
Caller->CalleeEdges.erase(*EI);
1281 *EI =
Callee->CallerEdges.erase(*EI);
1283 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1284 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1287template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1288void CallsiteContextGraph<
1289 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1290 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1292 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1294 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1300template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1301void CallsiteContextGraph<
1302 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1303 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1305 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1307 Edge->Caller->eraseCalleeEdge(
Edge.get());
1308 EI =
Node->CallerEdges.erase(EI);
1314template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1315typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1316CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1317 findEdgeFromCallee(
const ContextNode *Callee) {
1318 for (
const auto &
Edge : CalleeEdges)
1319 if (
Edge->Callee == Callee)
1324template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1325typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1326CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1327 findEdgeFromCaller(
const ContextNode *Caller) {
1328 for (
const auto &
Edge : CallerEdges)
1329 if (
Edge->Caller == Caller)
1334template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1335void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1336 eraseCalleeEdge(
const ContextEdge *
Edge) {
1338 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1339 return CalleeEdge.get() ==
Edge;
1341 assert(EI != CalleeEdges.end());
1342 CalleeEdges.erase(EI);
1345template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1346void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1347 eraseCallerEdge(
const ContextEdge *
Edge) {
1349 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1350 return CallerEdge.get() ==
Edge;
1352 assert(EI != CallerEdges.end());
1353 CallerEdges.erase(EI);
1356template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1357uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1358 DenseSet<uint32_t> &ContextIds)
const {
1360 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1361 uint8_t
AllocType = (uint8_t)AllocationType::None;
1362 for (
auto Id : ContextIds) {
1363 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1371template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1373CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1374 const DenseSet<uint32_t> &Node1Ids,
1375 const DenseSet<uint32_t> &Node2Ids)
const {
1377 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1378 uint8_t
AllocType = (uint8_t)AllocationType::None;
1379 for (
auto Id : Node1Ids) {
1380 if (!Node2Ids.
count(Id))
1382 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1390template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1391uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1392 const DenseSet<uint32_t> &Node1Ids,
1393 const DenseSet<uint32_t> &Node2Ids)
const {
1394 if (Node1Ids.
size() < Node2Ids.
size())
1395 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1397 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1400template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1401typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1402CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1403 CallInfo
Call,
const FuncTy *
F) {
1405 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1406 AllocationCallToContextNodeMap[
Call] = AllocNode;
1408 AllocNode->OrigStackOrAllocId = LastContextId;
1411 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1427template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1428template <
class NodeT,
class IteratorT>
1429void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1430 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1433 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1439 ContextIdToAllocationType[++LastContextId] =
AllocType;
1441 bool IsImportant =
false;
1442 if (!ContextSizeInfo.
empty()) {
1443 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1447 uint64_t TotalCold = 0;
1448 for (
auto &CSI : ContextSizeInfo)
1449 TotalCold += CSI.TotalSize;
1455 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1458 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1459 TotalSizeToContextIdTopNCold.erase(
1460 TotalSizeToContextIdTopNCold.begin());
1461 assert(ImportantContextIdInfo.count(IdToRemove));
1462 ImportantContextIdInfo.erase(IdToRemove);
1464 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1468 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1472 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1477 ContextNode *PrevNode = AllocNode;
1481 SmallSet<uint64_t, 8> StackIdSet;
1484 ContextIter != StackContext.
end(); ++ContextIter) {
1485 auto StackId = getStackId(*ContextIter);
1487 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1488 ContextNode *StackNode = getNodeForStackId(StackId);
1490 StackNode = createNewNode(
false);
1491 StackEntryIdToContextNodeMap[StackId] = StackNode;
1492 StackNode->OrigStackOrAllocId = StackId;
1497 auto Ins = StackIdSet.
insert(StackId);
1499 StackNode->Recursive =
true;
1501 StackNode->AllocTypes |= (uint8_t)
AllocType;
1502 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1503 PrevNode = StackNode;
1507template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1509CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1510 const DenseSet<uint32_t> &StackSequenceContextIds,
1511 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1512 DenseSet<uint32_t> NewContextIds;
1513 for (
auto OldId : StackSequenceContextIds) {
1514 NewContextIds.
insert(++LastContextId);
1515 OldToNewContextIds[OldId].insert(LastContextId);
1516 assert(ContextIdToAllocationType.count(OldId));
1518 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1519 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1520 if (CSI != ContextIdToContextSizeInfos.end())
1521 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1522 if (DotAllocContextIds.
contains(OldId))
1523 DotAllocContextIds.
insert(LastContextId);
1525 return NewContextIds;
1528template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1529void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1530 propagateDuplicateContextIds(
1531 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1533 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1534 DenseSet<uint32_t> NewIds;
1535 for (
auto Id : ContextIds)
1536 if (
auto NewId = OldToNewContextIds.find(Id);
1537 NewId != OldToNewContextIds.end())
1543 auto UpdateCallers = [&](ContextNode *
Node,
1544 DenseSet<const ContextEdge *> &Visited,
1545 auto &&UpdateCallers) ->
void {
1546 for (
const auto &
Edge :
Node->CallerEdges) {
1550 ContextNode *NextNode =
Edge->Caller;
1551 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1554 if (!NewIdsToAdd.
empty()) {
1555 Edge->getContextIds().insert_range(NewIdsToAdd);
1556 UpdateCallers(NextNode, Visited, UpdateCallers);
1561 DenseSet<const ContextEdge *> Visited;
1562 for (
auto &Entry : AllocationCallToContextNodeMap) {
1564 UpdateCallers(Node, Visited, UpdateCallers);
1568template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1569void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1570 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1573 DenseSet<uint32_t> RemainingContextIds) {
1575 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1576 DenseSet<uint32_t> RecursiveContextIds;
1577 DenseSet<uint32_t> AllCallerContextIds;
1582 for (
auto &CE : OrigEdges) {
1583 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1584 for (
auto Id :
CE->getContextIds())
1585 if (!AllCallerContextIds.
insert(Id).second)
1586 RecursiveContextIds.
insert(Id);
1590 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1592 DenseSet<uint32_t> NewEdgeContextIds;
1593 DenseSet<uint32_t> NotFoundContextIds;
1597 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1598 NotFoundContextIds);
1601 if (RecursiveContextIds.
empty()) {
1604 RemainingContextIds.
swap(NotFoundContextIds);
1614 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1616 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1619 if (NewEdgeContextIds.
empty()) {
1623 if (TowardsCallee) {
1624 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1625 auto NewEdge = std::make_shared<ContextEdge>(
1626 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1627 NewNode->CalleeEdges.push_back(NewEdge);
1628 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1630 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1631 auto NewEdge = std::make_shared<ContextEdge>(
1632 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1633 NewNode->CallerEdges.push_back(NewEdge);
1634 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1637 if (
Edge->getContextIds().empty()) {
1638 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1645template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1647 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1651 assert(!Edge->ContextIds.empty());
1654template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1656 bool CheckEdges =
true) {
1657 if (
Node->isRemoved())
1661 auto NodeContextIds =
Node->getContextIds();
1665 if (
Node->CallerEdges.size()) {
1667 Node->CallerEdges.front()->ContextIds);
1671 set_union(CallerEdgeContextIds, Edge->ContextIds);
1678 NodeContextIds == CallerEdgeContextIds ||
1681 if (
Node->CalleeEdges.size()) {
1683 Node->CalleeEdges.front()->ContextIds);
1687 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1693 NodeContextIds == CalleeEdgeContextIds);
1702 for (
const auto &
E :
Node->CalleeEdges)
1708template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1709void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1710 assignStackNodesPostOrder(ContextNode *Node,
1711 DenseSet<const ContextNode *> &Visited,
1712 DenseMap<uint64_t, std::vector<CallContextInfo>>
1713 &StackIdToMatchingCalls,
1714 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1715 const DenseSet<uint32_t> &ImportantContextIds) {
1723 auto CallerEdges =
Node->CallerEdges;
1724 for (
auto &
Edge : CallerEdges) {
1726 if (
Edge->isRemoved()) {
1730 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1731 CallToMatchingCall, ImportantContextIds);
1740 if (
Node->IsAllocation ||
1741 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1744 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1748 if (Calls.size() == 1) {
1749 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1750 if (Ids.size() == 1) {
1751 assert(SavedContextIds.empty());
1753 assert(Node == getNodeForStackId(Ids[0]));
1754 if (
Node->Recursive)
1757 NonAllocationCallToContextNodeMap[
Call] =
Node;
1759 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1767 uint64_t LastId =
Node->OrigStackOrAllocId;
1768 ContextNode *LastNode = getNodeForStackId(LastId);
1771 assert(LastNode == Node);
1773 ContextNode *LastNode =
Node;
1778 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1780 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1781 bool CreatedNode =
false;
1782 for (
unsigned I = 0;
I < Calls.size();
1783 I++, PrevIterCreatedNode = CreatedNode) {
1784 CreatedNode =
false;
1785 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1788 if (SavedContextIds.empty()) {
1795 auto MatchingCall = CallToMatchingCall[
Call];
1796 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1800 assert(
I > 0 && !PrevIterCreatedNode);
1803 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1808 assert(LastId == Ids.back());
1817 ContextNode *PrevNode = LastNode;
1821 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1823 ContextNode *CurNode = getNodeForStackId(Id);
1827 assert(!CurNode->Recursive);
1829 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1841 if (SavedContextIds.empty()) {
1850 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1851 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1853 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1855 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1861 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1866 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1871 for (
auto Id : Ids) {
1872 ContextNode *CurNode = getNodeForStackId(Id);
1879 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1886 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1887 if (PrevEdge->getContextIds().empty())
1888 removeEdgeFromGraph(PrevEdge);
1893 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1894 ? (uint8_t)AllocationType::None
1895 : CurNode->computeAllocType();
1899 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1903 for (
auto Id : Ids) {
1904 ContextNode *CurNode = getNodeForStackId(Id);
1913template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1914void CallsiteContextGraph<DerivedCCG, FuncTy,
1915 CallTy>::fixupImportantContexts() {
1916 if (ImportantContextIdInfo.empty())
1920 NumImportantContextIds = ImportantContextIdInfo.size();
1926 exportToDot(
"beforestackfixup");
1951 for (
auto &[CurContextId, Info] : ImportantContextIdInfo) {
1952 if (
Info.StackIdsToNode.empty())
1955 ContextNode *PrevNode =
nullptr;
1956 ContextNode *CurNode =
nullptr;
1957 DenseSet<const ContextEdge *> VisitedEdges;
1958 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1961 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1965 auto LenToEnd = AllStackIds.size() -
I;
1973 auto CheckStackIds = AllStackIds.slice(
I, Len);
1974 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1975 if (EntryIt ==
Info.StackIdsToNode.end())
1977 CurNode = EntryIt->second;
1994 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1997 if (CurEdge->getContextIds().insert(CurContextId).second) {
1998 NumFixupEdgeIdsInserted++;
2003 NumFixupEdgesAdded++;
2004 DenseSet<uint32_t> ContextIds({CurContextId});
2005 auto AllocType = computeAllocType(ContextIds);
2006 auto NewEdge = std::make_shared<ContextEdge>(
2007 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2008 PrevNode->CallerEdges.push_back(NewEdge);
2009 CurNode->CalleeEdges.push_back(NewEdge);
2011 CurEdge = NewEdge.get();
2014 VisitedEdges.
insert(CurEdge);
2017 for (
auto &
Edge : PrevNode->CallerEdges) {
2021 Edge->getContextIds().erase(CurContextId);
2029template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2030void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2038 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2039 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2040 for (
auto &
Call : CallsWithMetadata) {
2042 if (AllocationCallToContextNodeMap.count(
Call))
2044 auto StackIdsWithContextNodes =
2045 getStackIdsWithContextNodesForCall(
Call.call());
2048 if (StackIdsWithContextNodes.empty())
2052 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2053 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2063 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2067 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2068 for (
auto &It : StackIdToMatchingCalls) {
2069 auto &Calls = It.getSecond();
2071 if (Calls.size() == 1) {
2072 auto &Ids = Calls[0].StackIds;
2073 if (Ids.size() == 1)
2086 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2087 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2088 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2091 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2092 return A.StackIds.size() >
B.StackIds.size() ||
2093 (
A.StackIds.size() ==
B.StackIds.size() &&
2094 (
A.StackIds <
B.StackIds ||
2095 (
A.StackIds ==
B.StackIds &&
2096 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2102 uint64_t LastId = It.getFirst();
2103 ContextNode *LastNode = getNodeForStackId(LastId);
2107 if (LastNode->Recursive)
2112 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2120 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2123 for (
unsigned I = 0;
I < Calls.size();
I++) {
2124 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2125 assert(SavedContextIds.empty());
2126 assert(LastId == Ids.back());
2131 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2132 MatchingIdsFuncSet.
clear();
2139 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2141 ContextNode *PrevNode = LastNode;
2142 ContextNode *CurNode = LastNode;
2147 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2149 CurNode = getNodeForStackId(Id);
2153 if (CurNode->Recursive) {
2158 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2179 if (StackSequenceContextIds.
empty()) {
2192 if (Ids.back() != getLastStackId(
Call)) {
2193 for (
const auto &PE : LastNode->CallerEdges) {
2194 set_subtract(StackSequenceContextIds, PE->getContextIds());
2195 if (StackSequenceContextIds.
empty())
2199 if (StackSequenceContextIds.
empty())
2211 MatchingIdsFuncSet.
insert(Func);
2218 bool DuplicateContextIds =
false;
2219 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2220 auto &CallCtxInfo = Calls[J];
2221 auto &NextIds = CallCtxInfo.StackIds;
2224 auto *NextFunc = CallCtxInfo.Func;
2225 if (NextFunc != Func) {
2228 DuplicateContextIds =
true;
2231 auto &NextCall = CallCtxInfo.Call;
2232 CallToMatchingCall[NextCall] =
Call;
2243 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2244 StackSequenceContextIds.
size());
2247 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2248 : StackSequenceContextIds;
2249 assert(!SavedContextIds.empty());
2251 if (!DuplicateContextIds) {
2255 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2256 if (LastNodeContextIds.
empty())
2263 propagateDuplicateContextIds(OldToNewContextIds);
2273 DenseSet<const ContextNode *> Visited;
2275 ImportantContextIdInfo.keys());
2276 for (
auto &Entry : AllocationCallToContextNodeMap)
2277 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2278 CallToMatchingCall, ImportantContextIds);
2280 fixupImportantContexts();
2286uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2287 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2289 return CallsiteContext.
back();
2292uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2294 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2297 return Index.getStackIdAtIndex(CallsiteContext.
back());
2319 auto Pos =
F.getName().find_last_of(
'.');
2322 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2328std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2329 const Instruction *
Call,
2330 unsigned CloneNo)
const {
2336std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2337 const IndexCall &
Call,
2338 unsigned CloneNo)
const {
2339 auto VI = FSToVIMap.find(Func);
2340 assert(VI != FSToVIMap.end());
2343 return CallerName +
" -> alloc";
2346 return CallerName +
" -> " +
2348 Callsite->Clones[CloneNo]);
2352std::vector<uint64_t>
2353ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2354 Instruction *
Call) {
2355 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2357 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2361std::vector<uint64_t>
2362IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2364 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2366 return getStackIdsWithContextNodes<CallsiteInfo,
2367 SmallVector<unsigned>::const_iterator>(
2371template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2372template <
class NodeT,
class IteratorT>
2373std::vector<uint64_t>
2374CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2375 CallStack<NodeT, IteratorT> &CallsiteContext) {
2376 std::vector<uint64_t> StackIds;
2377 for (
auto IdOrIndex : CallsiteContext) {
2378 auto StackId = getStackId(IdOrIndex);
2379 ContextNode *
Node = getNodeForStackId(StackId);
2382 StackIds.push_back(StackId);
2387ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2389 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2390 :
Mod(
M), OREGetter(OREGetter) {
2394 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2396 std::vector<CallInfo> CallsWithMetadata;
2397 for (
auto &BB :
F) {
2398 for (
auto &
I : BB) {
2401 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2402 CallsWithMetadata.push_back(&
I);
2403 auto *AllocNode = addAllocNode(&
I, &
F);
2404 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2408 for (
auto &MDOp : MemProfMD->operands()) {
2410 std::vector<ContextTotalSize> ContextSizeInfo;
2412 if (MIBMD->getNumOperands() > 2) {
2413 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2414 MDNode *ContextSizePair =
2423 ContextSizeInfo.push_back({FullStackId, TotalSize});
2429 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2430 AllocNode, StackContext, CallsiteContext,
2432 TotalSizeToContextIdTopNCold);
2437 DotAllocContextIds = AllocNode->getContextIds();
2441 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2442 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2445 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2446 CallsWithMetadata.push_back(&
I);
2450 if (!CallsWithMetadata.empty())
2451 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2455 dbgs() <<
"CCG before updating call stack chains:\n";
2460 exportToDot(
"prestackupdate");
2465 exportToDot(
"poststackupdate");
2467 handleCallsitesWithMultipleTargets();
2472 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2473 for (
auto &
Call : FuncEntry.second)
2474 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2480IndexCallsiteContextGraph::findAliaseeGUIDsPrevailingInDifferentModule() {
2481 DenseSet<GlobalValue::GUID> AliaseeGUIDs;
2482 for (
auto &
I : Index) {
2484 for (
auto &S :
VI.getSummaryList()) {
2489 auto *AliaseeSummary = &AS->getAliasee();
2497 !isPrevailing(
VI.getGUID(), S.get()))
2502 auto AliaseeGUID = AS->getAliaseeGUID();
2504 if (!isPrevailing(AliaseeGUID, AliaseeSummary))
2505 AliaseeGUIDs.
insert(AliaseeGUID);
2508 AliaseesPrevailingInDiffModuleFromAlias += AliaseeGUIDs.
size();
2509 return AliaseeGUIDs;
2512IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2513 ModuleSummaryIndex &Index,
2523 findAliaseeGUIDsPrevailingInDifferentModule();
2527 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2528 for (
auto &
I : Index) {
2529 auto VI = Index.getValueInfo(
I);
2530 if (GUIDsToSkip.
contains(VI.getGUID()))
2532 for (
auto &S : VI.getSummaryList()) {
2541 !isPrevailing(VI.getGUID(), S.get()))
2546 std::vector<CallInfo> CallsWithMetadata;
2547 if (!
FS->allocs().empty()) {
2548 for (
auto &AN :
FS->mutableAllocs()) {
2553 if (AN.MIBs.empty())
2555 IndexCall AllocCall(&AN);
2556 CallsWithMetadata.push_back(AllocCall);
2557 auto *AllocNode = addAllocNode(AllocCall, FS);
2565 AN.ContextSizeInfos.size() == AN.MIBs.size());
2567 for (
auto &MIB : AN.MIBs) {
2570 std::vector<ContextTotalSize> ContextSizeInfo;
2571 if (!AN.ContextSizeInfos.empty()) {
2572 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2573 ContextSizeInfo.push_back({FullStackId, TotalSize});
2575 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2576 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2577 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2583 DotAllocContextIds = AllocNode->getContextIds();
2589 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2593 if (!
FS->callsites().empty())
2594 for (
auto &SN :
FS->mutableCallsites()) {
2595 IndexCall StackNodeCall(&SN);
2596 CallsWithMetadata.push_back(StackNodeCall);
2599 if (!CallsWithMetadata.empty())
2600 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2602 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2608 dbgs() <<
"CCG before updating call stack chains:\n";
2613 exportToDot(
"prestackupdate");
2618 exportToDot(
"poststackupdate");
2620 handleCallsitesWithMultipleTargets();
2625template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2626void CallsiteContextGraph<DerivedCCG, FuncTy,
2627 CallTy>::handleCallsitesWithMultipleTargets() {
2642 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2643 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2644 auto *
Node = Entry.second;
2653 std::vector<CallInfo> AllCalls;
2654 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2655 AllCalls.push_back(
Node->Call);
2669 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2672 auto It = AllCalls.begin();
2674 for (; It != AllCalls.end(); ++It) {
2677 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2680 if (!Edge->Callee->hasCall())
2682 assert(NodeToCallingFunc.count(Edge->Callee));
2684 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2693 if (
Node->Call != ThisCall) {
2694 Node->setCall(ThisCall);
2705 Node->MatchingCalls.clear();
2708 if (It == AllCalls.end()) {
2709 RemovedEdgesWithMismatchedCallees++;
2713 Node->setCall(CallInfo());
2718 for (++It; It != AllCalls.end(); ++It) {
2722 Node->MatchingCalls.push_back(ThisCall);
2731 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2732 return !it.second->hasCall() || it.second->Call != it.first;
2736 for (
auto &[
Call,
Node] : NewCallToNode)
2737 NonAllocationCallToContextNodeMap[
Call] =
Node;
2741 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2742 NonAllocationCallToContextNodeMap[
Call] =
Node;
2745template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2746bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2748 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2752 struct CallsWithSameCallee {
2753 std::vector<CallInfo> Calls;
2754 ContextNode *
Node =
nullptr;
2760 for (
auto ThisCall : AllCalls) {
2761 auto *
F = getCalleeFunc(
ThisCall.call());
2763 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2772 for (
const auto &Edge :
Node->CalleeEdges) {
2773 if (!Edge->Callee->hasCall())
2775 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2776 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2777 CalleeNodeToCallInfo[Edge->Callee] =
2778 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2784 if (CalleeNodeToCallInfo.
empty())
2796 ContextNode *UnmatchedCalleesNode =
nullptr;
2798 bool UsedOrigNode =
false;
2803 auto CalleeEdges =
Node->CalleeEdges;
2804 for (
auto &Edge : CalleeEdges) {
2805 if (!Edge->Callee->hasCall())
2810 ContextNode *CallerNodeToUse =
nullptr;
2814 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2815 if (!UnmatchedCalleesNode)
2816 UnmatchedCalleesNode =
2817 createNewNode(
false, NodeToCallingFunc[
Node]);
2818 CallerNodeToUse = UnmatchedCalleesNode;
2822 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2825 if (!UsedOrigNode) {
2828 Node->MatchingCalls.clear();
2829 UsedOrigNode =
true;
2832 createNewNode(
false, NodeToCallingFunc[
Node]);
2833 assert(!Info->Calls.empty());
2836 Info->Node->setCall(Info->Calls.front());
2842 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2844 CallerNodeToUse = Info->Node;
2848 if (CallerNodeToUse ==
Node)
2851 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2858 for (
auto &
I : CalleeNodeToCallInfo)
2859 removeNoneTypeCallerEdges(
I.second->Node);
2860 if (UnmatchedCalleesNode)
2861 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2862 removeNoneTypeCallerEdges(
Node);
2872uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2875 return Index.getStackIdAtIndex(IdOrIndex);
2878template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2879bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2880 CallTy
Call, EdgeIter &EI,
2881 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2883 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2884 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2887 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2888 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2893 if (FoundCalleeChain.empty())
2897 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2901 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2902 CurEdge->AllocTypes |=
Edge->AllocTypes;
2907 auto NewEdge = std::make_shared<ContextEdge>(
2908 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2909 Callee->CallerEdges.push_back(NewEdge);
2910 if (Caller ==
Edge->Caller) {
2914 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2917 "Iterator position not restored after insert and increment");
2919 Caller->CalleeEdges.push_back(NewEdge);
2924 auto *CurCalleeNode =
Edge->Callee;
2925 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2926 ContextNode *NewNode =
nullptr;
2928 if (TailCallToContextNodeMap.
count(NewCall)) {
2929 NewNode = TailCallToContextNodeMap[NewCall];
2930 NewNode->AllocTypes |=
Edge->AllocTypes;
2932 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2934 NewNode = createNewNode(
false, Func, NewCall);
2935 TailCallToContextNodeMap[NewCall] = NewNode;
2936 NewNode->AllocTypes =
Edge->AllocTypes;
2940 AddEdge(NewNode, CurCalleeNode);
2942 CurCalleeNode = NewNode;
2946 AddEdge(
Edge->Caller, CurCalleeNode);
2954 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2966bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2967 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2968 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2969 bool &FoundMultipleCalleeChains) {
2976 FoundCalleeChain.push_back({Callsite,
F});
2991 bool FoundSingleCalleeChain =
false;
2992 for (
auto &BB : *CalleeFunc) {
2993 for (
auto &
I : BB) {
2995 if (!CB || !CB->isTailCall())
2997 auto *CalledValue = CB->getCalledOperand();
2998 auto *CalledFunction = CB->getCalledFunction();
2999 if (CalledValue && !CalledFunction) {
3000 CalledValue = CalledValue->stripPointerCasts();
3007 assert(!CalledFunction &&
3008 "Expected null called function in callsite for alias");
3011 if (!CalledFunction)
3013 if (CalledFunction == ProfiledCallee) {
3014 if (FoundSingleCalleeChain) {
3015 FoundMultipleCalleeChains =
true;
3018 FoundSingleCalleeChain =
true;
3019 FoundProfiledCalleeCount++;
3020 FoundProfiledCalleeDepth +=
Depth;
3021 if (
Depth > FoundProfiledCalleeMaxDepth)
3022 FoundProfiledCalleeMaxDepth =
Depth;
3023 SaveCallsiteInfo(&
I, CalleeFunc);
3024 }
else if (findProfiledCalleeThroughTailCalls(
3025 ProfiledCallee, CalledFunction,
Depth + 1,
3026 FoundCalleeChain, FoundMultipleCalleeChains)) {
3029 assert(!FoundMultipleCalleeChains);
3030 if (FoundSingleCalleeChain) {
3031 FoundMultipleCalleeChains =
true;
3034 FoundSingleCalleeChain =
true;
3035 SaveCallsiteInfo(&
I, CalleeFunc);
3036 }
else if (FoundMultipleCalleeChains)
3041 return FoundSingleCalleeChain;
3044const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
3046 if (!CB->getCalledOperand() || CB->isIndirectCall())
3048 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3055bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3056 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3057 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3059 if (!CB->getCalledOperand() || CB->isIndirectCall())
3061 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3063 if (CalleeFunc == Func)
3066 if (Alias && Alias->getAliasee() == Func)
3077 bool FoundMultipleCalleeChains =
false;
3078 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3080 FoundMultipleCalleeChains)) {
3081 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3082 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3083 <<
" that actually called " << CalleeVal->getName()
3084 << (FoundMultipleCalleeChains
3085 ?
" (found multiple possible chains)"
3088 if (FoundMultipleCalleeChains)
3089 FoundProfiledCalleeNonUniquelyCount++;
3096bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3097 Instruction *Call2) {
3099 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3101 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3104 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3106 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3108 return CalleeFunc1 == CalleeFunc2;
3111bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3112 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3113 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3114 bool &FoundMultipleCalleeChains) {
3120 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3123 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3124 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3127 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3128 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3129 CallsiteInfo *NewCallsiteInfo =
3130 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3131 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3138 bool FoundSingleCalleeChain =
false;
3141 !isPrevailing(CurCallee.
getGUID(), S.get()))
3146 auto FSVI = CurCallee;
3149 FSVI = AS->getAliaseeVI();
3150 for (
auto &CallEdge :
FS->calls()) {
3151 if (!CallEdge.second.hasTailCall())
3153 if (CallEdge.first == ProfiledCallee) {
3154 if (FoundSingleCalleeChain) {
3155 FoundMultipleCalleeChains =
true;
3158 FoundSingleCalleeChain =
true;
3159 FoundProfiledCalleeCount++;
3160 FoundProfiledCalleeDepth +=
Depth;
3161 if (
Depth > FoundProfiledCalleeMaxDepth)
3162 FoundProfiledCalleeMaxDepth =
Depth;
3163 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3165 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3166 FSToVIMap[
FS] = FSVI;
3167 }
else if (findProfiledCalleeThroughTailCalls(
3168 ProfiledCallee, CallEdge.first,
Depth + 1,
3169 FoundCalleeChain, FoundMultipleCalleeChains)) {
3172 assert(!FoundMultipleCalleeChains);
3173 if (FoundSingleCalleeChain) {
3174 FoundMultipleCalleeChains =
true;
3177 FoundSingleCalleeChain =
true;
3178 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3180 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3181 FSToVIMap[
FS] = FSVI;
3182 }
else if (FoundMultipleCalleeChains)
3187 return FoundSingleCalleeChain;
3190const FunctionSummary *
3191IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3193 if (
Callee.getSummaryList().empty())
3198bool IndexCallsiteContextGraph::calleeMatchesFunc(
3199 IndexCall &
Call,
const FunctionSummary *Func,
3200 const FunctionSummary *CallerFunc,
3201 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3205 AliasSummary *Alias =
3206 Callee.getSummaryList().empty()
3209 assert(FSToVIMap.count(Func));
3210 auto FuncVI = FSToVIMap[
Func];
3211 if (Callee == FuncVI ||
3226 bool FoundMultipleCalleeChains =
false;
3227 if (!findProfiledCalleeThroughTailCalls(
3228 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3229 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3230 <<
" from " << FSToVIMap[CallerFunc]
3231 <<
" that actually called " << Callee
3232 << (FoundMultipleCalleeChains
3233 ?
" (found multiple possible chains)"
3236 if (FoundMultipleCalleeChains)
3237 FoundProfiledCalleeNonUniquelyCount++;
3244bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3247 return Callee1 == Callee2;
3250template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3251void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3257template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3258void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3259 raw_ostream &OS)
const {
3260 OS <<
"Node " <<
this <<
"\n";
3264 OS <<
" (recursive)";
3266 if (!MatchingCalls.empty()) {
3267 OS <<
"\tMatchingCalls:\n";
3268 for (
auto &MatchingCall : MatchingCalls) {
3270 MatchingCall.print(OS);
3274 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3276 OS <<
"\tContextIds:";
3278 auto ContextIds = getContextIds();
3279 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3280 std::sort(SortedIds.begin(), SortedIds.end());
3281 for (
auto Id : SortedIds)
3284 OS <<
"\tCalleeEdges:\n";
3285 for (
auto &
Edge : CalleeEdges)
3286 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3288 OS <<
"\tCallerEdges:\n";
3289 for (
auto &
Edge : CallerEdges)
3290 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3292 if (!Clones.empty()) {
3295 for (
auto *
C : Clones)
3296 OS <<
LS <<
C <<
" NodeId: " <<
C->NodeId;
3298 }
else if (CloneOf) {
3299 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3303template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3304void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3310template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3311void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3312 raw_ostream &OS)
const {
3313 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3314 << (IsBackedge ?
" (BE)" :
"")
3316 OS <<
" ContextIds:";
3317 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3318 std::sort(SortedIds.begin(), SortedIds.end());
3319 for (
auto Id : SortedIds)
3323template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3324void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3328template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3329void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3330 raw_ostream &OS)
const {
3331 OS <<
"Callsite Context Graph:\n";
3332 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3334 if (
Node->isRemoved())
3341template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3342void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3344 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark)
const {
3345 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3347 if (
Node->isRemoved())
3349 if (!
Node->IsAllocation)
3351 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3352 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3353 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3354 std::sort(SortedIds.begin(), SortedIds.end());
3355 for (
auto Id : SortedIds) {
3356 auto TypeI = ContextIdToAllocationType.find(Id);
3357 assert(TypeI != ContextIdToAllocationType.end());
3358 auto CSI = ContextIdToContextSizeInfos.find(Id);
3359 if (CSI != ContextIdToContextSizeInfos.end()) {
3360 for (
auto &Info : CSI->second) {
3363 " full allocation context " + std::to_string(
Info.FullStackId) +
3364 " with total size " + std::to_string(
Info.TotalSize) +
" is " +
3366 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3368 " due to cold byte percent";
3370 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3374 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3381 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3383 " due to cold byte percent";
3385 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3389 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3395template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3396void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3397 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3400 for (
auto &
Edge :
Node->CallerEdges)
3405template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3407 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3408 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3410 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3426 return G->NodeOwner.begin()->get();
3429 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3430 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3449template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3463 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3469 std::string LabelString =
3470 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3473 LabelString +=
"\n";
3474 if (
Node->hasCall()) {
3475 auto Func =
G->NodeToCallingFunc.find(
Node);
3476 assert(Func !=
G->NodeToCallingFunc.end());
3478 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3479 for (
auto &MatchingCall :
Node->MatchingCalls) {
3480 LabelString +=
"\n";
3481 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3482 MatchingCall.cloneNo());
3485 LabelString +=
"null call";
3486 if (
Node->Recursive)
3487 LabelString +=
" (recursive)";
3489 LabelString +=
" (external)";
3495 auto ContextIds =
Node->getContextIds();
3499 bool Highlight =
false;
3508 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3509 getContextIds(ContextIds) +
"\"")
3513 AttributeString +=
",fontsize=\"30\"";
3515 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3517 if (
Node->CloneOf) {
3518 AttributeString +=
",color=\"blue\"";
3519 AttributeString +=
",style=\"filled,bold,dashed\"";
3521 AttributeString +=
",style=\"filled\"";
3522 return AttributeString;
3527 auto &Edge = *(ChildIter.getCurrent());
3532 bool Highlight =
false;
3541 auto Color = getColor(Edge->AllocTypes, Highlight);
3542 std::string AttributeString =
3543 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3545 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3548 if (Edge->IsBackedge)
3549 AttributeString +=
",style=\"dotted\"";
3552 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3553 return AttributeString;
3559 if (
Node->isRemoved())
3572 std::string IdString =
"ContextIds:";
3573 if (ContextIds.
size() < 100) {
3574 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3575 std::sort(SortedIds.begin(), SortedIds.end());
3576 for (
auto Id : SortedIds)
3577 IdString += (
" " +
Twine(Id)).str();
3579 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3584 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3590 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3592 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3593 if (AllocTypes == (uint8_t)AllocationType::Cold)
3594 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3596 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3597 return Highlight ?
"magenta" :
"mediumorchid1";
3601 static std::string getNodeId(NodeRef Node) {
3602 std::stringstream SStream;
3603 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3604 std::string
Result = SStream.str();
3613template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3618template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3619void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3620 std::string Label)
const {
3625template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3626typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3627CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3628 const std::shared_ptr<ContextEdge> &
Edge,
3629 DenseSet<uint32_t> ContextIdsToMove) {
3631 assert(NodeToCallingFunc.count(Node));
3632 ContextNode *Clone =
3633 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3634 Node->addClone(Clone);
3635 Clone->MatchingCalls =
Node->MatchingCalls;
3636 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3641template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3642void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3643 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3644 ContextNode *NewCallee,
bool NewClone,
3645 DenseSet<uint32_t> ContextIdsToMove) {
3648 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3650 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3652 ContextNode *OldCallee =
Edge->Callee;
3656 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3660 if (ContextIdsToMove.
empty())
3661 ContextIdsToMove =
Edge->getContextIds();
3665 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3668 NewCallee->AllocTypes |=
Edge->AllocTypes;
3670 if (ExistingEdgeToNewCallee) {
3673 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3674 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3675 assert(
Edge->ContextIds == ContextIdsToMove);
3676 removeEdgeFromGraph(
Edge.get());
3679 Edge->Callee = NewCallee;
3680 NewCallee->CallerEdges.push_back(
Edge);
3682 OldCallee->eraseCallerEdge(
Edge.get());
3689 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3690 if (ExistingEdgeToNewCallee) {
3693 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3694 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3697 auto NewEdge = std::make_shared<ContextEdge>(
3698 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3699 Edge->Caller->CalleeEdges.push_back(NewEdge);
3700 NewCallee->CallerEdges.push_back(NewEdge);
3704 NewCallee->AllocTypes |= CallerEdgeAllocType;
3706 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3711 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3712 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3716 if (CalleeToUse == OldCallee) {
3720 if (EdgeIsRecursive) {
3724 CalleeToUse = NewCallee;
3728 DenseSet<uint32_t> EdgeContextIdsToMove =
3730 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3731 OldCalleeEdge->AllocTypes =
3732 computeAllocType(OldCalleeEdge->getContextIds());
3739 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3740 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3741 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3745 auto NewEdge = std::make_shared<ContextEdge>(
3746 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3747 EdgeContextIdsToMove);
3748 NewCallee->CalleeEdges.push_back(NewEdge);
3749 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3753 OldCallee->AllocTypes = OldCallee->computeAllocType();
3755 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3756 OldCallee->emptyContextIds());
3760 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3763 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3769template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3770void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3771 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3772 ContextNode *NewCaller) {
3773 auto *OldCallee =
Edge->Callee;
3774 auto *NewCallee = OldCallee;
3777 bool Recursive =
Edge->Caller ==
Edge->Callee;
3779 NewCallee = NewCaller;
3781 ContextNode *OldCaller =
Edge->Caller;
3782 OldCaller->eraseCalleeEdge(
Edge.get());
3786 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3788 if (ExistingEdgeToNewCaller) {
3791 ExistingEdgeToNewCaller->getContextIds().insert_range(
3792 Edge->getContextIds());
3793 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3794 Edge->ContextIds.clear();
3795 Edge->AllocTypes = (uint8_t)AllocationType::None;
3796 OldCallee->eraseCallerEdge(
Edge.get());
3799 Edge->Caller = NewCaller;
3800 NewCaller->CalleeEdges.push_back(
Edge);
3802 assert(NewCallee == NewCaller);
3805 Edge->Callee = NewCallee;
3806 NewCallee->CallerEdges.push_back(
Edge);
3807 OldCallee->eraseCallerEdge(
Edge.get());
3813 NewCaller->AllocTypes |=
Edge->AllocTypes;
3820 bool IsNewNode = NewCaller->CallerEdges.empty();
3829 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3830 auto OldCallerCaller = OldCallerEdge->Caller;
3834 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3835 if (OldCaller == OldCallerCaller) {
3836 OldCallerCaller = NewCaller;
3842 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3843 OldCallerEdge->AllocTypes =
3844 computeAllocType(OldCallerEdge->getContextIds());
3849 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3853 if (ExistingCallerEdge) {
3854 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3855 ExistingCallerEdge->AllocTypes |=
3856 computeAllocType(EdgeContextIdsToMove);
3859 auto NewEdge = std::make_shared<ContextEdge>(
3860 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3861 EdgeContextIdsToMove);
3862 NewCaller->CallerEdges.push_back(NewEdge);
3863 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3868 OldCaller->AllocTypes = OldCaller->computeAllocType();
3870 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3871 OldCaller->emptyContextIds());
3875 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3878 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3884template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3885void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3886 recursivelyRemoveNoneTypeCalleeEdges(
3887 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3892 removeNoneTypeCalleeEdges(Node);
3894 for (
auto *Clone :
Node->Clones)
3895 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3899 auto CallerEdges =
Node->CallerEdges;
3900 for (
auto &
Edge : CallerEdges) {
3902 if (
Edge->isRemoved()) {
3906 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3911template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3912void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3917 DenseSet<const ContextNode *> Visited;
3918 DenseSet<const ContextNode *> CurrentStack;
3919 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3921 if (
Node->isRemoved())
3924 if (!
Node->CallerEdges.empty())
3926 markBackedges(Node, Visited, CurrentStack);
3932template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3933void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3934 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3935 DenseSet<const ContextNode *> &CurrentStack) {
3936 auto I = Visited.
insert(Node);
3940 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3941 auto *
Callee = CalleeEdge->Callee;
3942 if (Visited.
count(Callee)) {
3945 if (CurrentStack.
count(Callee))
3946 CalleeEdge->IsBackedge =
true;
3949 CurrentStack.
insert(Callee);
3950 markBackedges(Callee, Visited, CurrentStack);
3951 CurrentStack.
erase(Callee);
3955template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3956void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3957 DenseSet<const ContextNode *> Visited;
3958 for (
auto &Entry : AllocationCallToContextNodeMap) {
3960 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3963 for (
auto &Entry : AllocationCallToContextNodeMap)
3964 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3977template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3978void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3979 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3980 const DenseSet<uint32_t> &AllocContextIds) {
3990 if (!
Node->hasCall())
4009 auto CallerEdges =
Node->CallerEdges;
4010 for (
auto &
Edge : CallerEdges) {
4012 if (
Edge->isRemoved()) {
4018 if (
Edge->IsBackedge) {
4025 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
4026 identifyClones(
Edge->Caller, Visited, AllocContextIds);
4049 const unsigned AllocTypeCloningPriority[] = { 3, 4,
4053 [&](
const std::shared_ptr<ContextEdge> &
A,
4054 const std::shared_ptr<ContextEdge> &
B) {
4057 if (A->ContextIds.empty())
4063 if (B->ContextIds.empty())
4066 if (A->AllocTypes == B->AllocTypes)
4069 return *A->ContextIds.begin() < *B->ContextIds.begin();
4070 return AllocTypeCloningPriority[A->AllocTypes] <
4071 AllocTypeCloningPriority[B->AllocTypes];
4074 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4076 DenseSet<uint32_t> RecursiveContextIds;
4081 DenseSet<uint32_t> AllCallerContextIds;
4082 for (
auto &CE :
Node->CallerEdges) {
4085 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4086 for (
auto Id :
CE->getContextIds())
4087 if (!AllCallerContextIds.
insert(Id).second)
4088 RecursiveContextIds.
insert(Id);
4098 auto CallerEdges =
Node->CallerEdges;
4099 for (
auto &CallerEdge : CallerEdges) {
4101 if (CallerEdge->isRemoved()) {
4105 assert(CallerEdge->Callee == Node);
4114 if (!CallerEdge->Caller->hasCall())
4119 auto CallerEdgeContextsForAlloc =
4121 if (!RecursiveContextIds.
empty())
4122 CallerEdgeContextsForAlloc =
4124 if (CallerEdgeContextsForAlloc.empty())
4127 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4131 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4132 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4133 for (
auto &CalleeEdge :
Node->CalleeEdges)
4134 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4135 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4151 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4152 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4153 if (!CallerEdge->IsBackedge &&
4154 allocTypeToUse(CallerAllocTypeForAlloc) ==
4155 allocTypeToUse(
Node->AllocTypes) &&
4156 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4157 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4161 if (CallerEdge->IsBackedge) {
4165 DeferredBackedges++;
4178 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4179 !Visited.
count(CallerEdge->Caller)) {
4180 const auto OrigIdCount = CallerEdge->getContextIds().size();
4183 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4184 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4188 bool UpdatedEdge =
false;
4189 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4190 for (
auto E :
Node->CallerEdges) {
4192 if (
E->Caller->CloneOf != CallerEdge->Caller)
4196 auto CallerEdgeContextsForAllocNew =
4198 if (CallerEdgeContextsForAllocNew.empty())
4208 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4218 if (CallerEdge->isRemoved())
4228 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4229 if (CallerEdgeContextsForAlloc.empty())
4234 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4235 CalleeEdgeAllocTypesForCallerEdge.clear();
4236 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4237 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4238 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4244 ContextNode *Clone =
nullptr;
4245 for (
auto *CurClone :
Node->Clones) {
4246 if (allocTypeToUse(CurClone->AllocTypes) !=
4247 allocTypeToUse(CallerAllocTypeForAlloc))
4254 assert(!BothSingleAlloc ||
4255 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4261 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4262 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4270 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4271 CallerEdgeContextsForAlloc);
4273 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4276 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4283 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4289void ModuleCallsiteContextGraph::updateAllocationCall(
4294 "memprof", AllocTypeString);
4297 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4298 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4300 <<
" marked with memprof allocation attribute "
4301 <<
ore::NV(
"Attribute", AllocTypeString));
4304void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4308 assert(AI->Versions.size() >
Call.cloneNo());
4313ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4315 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4316 return AllocationType::None;
4317 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4318 ? AllocationType::Cold
4319 : AllocationType::NotCold;
4323IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4325 assert(AI->Versions.size() >
Call.cloneNo());
4329void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4330 FuncInfo CalleeFunc) {
4331 auto *CurF = getCalleeFunc(CallerCall.call());
4332 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4339 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4341 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4343 MismatchedCloneAssignments++;
4346 if (NewCalleeCloneNo > 0)
4347 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4348 OREGetter(CallerCall.call()->getFunction())
4349 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4350 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4351 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4352 <<
" assigned to call function clone "
4353 <<
ore::NV(
"Callee", CalleeFunc.func()));
4356void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4357 FuncInfo CalleeFunc) {
4360 "Caller cannot be an allocation which should not have profiled calls");
4361 assert(CI->Clones.size() > CallerCall.cloneNo());
4362 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4363 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4368 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4370 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4372 MismatchedCloneAssignments++;
4374 CurCalleeCloneNo = NewCalleeCloneNo;
4386 SP->replaceLinkageName(MDName);
4390 TempDISubprogram NewDecl = Decl->
clone();
4391 NewDecl->replaceLinkageName(MDName);
4395CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4397ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4398 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4399 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4404 assert(!
Func.func()->getParent()->getFunction(Name));
4405 NewFunc->setName(Name);
4407 for (
auto &Inst : CallsWithMetadataInFunc) {
4409 assert(Inst.cloneNo() == 0);
4412 OREGetter(
Func.func())
4413 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4414 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4415 return {NewFunc, CloneNo};
4418CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4419 IndexCall>::FuncInfo
4420IndexCallsiteContextGraph::cloneFunctionForCallsite(
4421 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4422 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4436 for (
auto &Inst : CallsWithMetadataInFunc) {
4438 assert(Inst.cloneNo() == 0);
4440 assert(AI->Versions.size() == CloneNo);
4443 AI->Versions.push_back(0);
4446 assert(CI && CI->Clones.size() == CloneNo);
4449 CI->Clones.push_back(0);
4451 CallMap[Inst] = {Inst.call(), CloneNo};
4453 return {
Func.func(), CloneNo};
4470template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4471void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4477 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4478 for (
auto &Entry : AllocationCallToContextNodeMap) {
4480 for (
auto Id :
Node->getContextIds())
4481 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4482 for (
auto *Clone :
Node->Clones) {
4483 for (
auto Id : Clone->getContextIds())
4484 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4491 DenseSet<const ContextNode *> Visited;
4492 for (
auto &Entry : AllocationCallToContextNodeMap) {
4495 mergeClones(Node, Visited, ContextIdToAllocationNode);
4501 auto Clones =
Node->Clones;
4502 for (
auto *Clone : Clones)
4503 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4507 dbgs() <<
"CCG after merging:\n";
4511 exportToDot(
"aftermerge");
4519template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4520void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4521 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4522 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4532 bool FoundUnvisited =
true;
4534 while (FoundUnvisited) {
4536 FoundUnvisited =
false;
4539 auto CallerEdges =
Node->CallerEdges;
4540 for (
auto CallerEdge : CallerEdges) {
4542 if (CallerEdge->Callee != Node)
4547 FoundUnvisited =
true;
4548 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4552 TotalMergeInvokes++;
4553 TotalMergeIters += Iters;
4554 if (Iters > MaxMergeIters)
4555 MaxMergeIters = Iters;
4558 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4561template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4562void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4563 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4564 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4566 if (
Node->emptyContextIds())
4571 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4572 OrigNodeToCloneEdges;
4573 for (
const auto &
E :
Node->CalleeEdges) {
4578 OrigNodeToCloneEdges[
Base].push_back(
E);
4584 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4585 const std::shared_ptr<ContextEdge> &
B) {
4586 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4587 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4588 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4590 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4594 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4599 for (
auto Entry : OrigNodeToCloneEdges) {
4602 auto &CalleeEdges =
Entry.second;
4603 auto NumCalleeClones = CalleeEdges.size();
4605 if (NumCalleeClones == 1)
4616 DenseSet<ContextNode *> OtherCallersToShareMerge;
4617 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4618 OtherCallersToShareMerge);
4623 ContextNode *MergeNode =
nullptr;
4624 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4625 for (
auto CalleeEdge : CalleeEdges) {
4626 auto *OrigCallee = CalleeEdge->Callee;
4632 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4633 MergeNode = OrigCallee;
4634 NonNewMergedNodes++;
4641 if (!OtherCallersToShareMerge.
empty()) {
4642 bool MoveAllCallerEdges =
true;
4643 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4644 if (CalleeCallerE == CalleeEdge)
4646 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4647 MoveAllCallerEdges =
false;
4653 if (MoveAllCallerEdges) {
4654 MergeNode = OrigCallee;
4655 NonNewMergedNodes++;
4662 assert(MergeNode != OrigCallee);
4663 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4666 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4671 if (!OtherCallersToShareMerge.
empty()) {
4675 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4676 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4677 if (CalleeCallerE == CalleeEdge)
4679 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4681 CallerToMoveCount[CalleeCallerE->Caller]++;
4682 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4686 removeNoneTypeCalleeEdges(OrigCallee);
4687 removeNoneTypeCalleeEdges(MergeNode);
4705template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4706void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4707 findOtherCallersToShareMerge(
4709 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4710 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4711 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4712 auto NumCalleeClones = CalleeEdges.size();
4715 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4718 unsigned PossibleOtherCallerNodes = 0;
4722 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4728 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4729 for (
auto CalleeEdge : CalleeEdges) {
4730 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4733 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4734 if (CalleeCallerEdges->Caller == Node) {
4735 assert(CalleeCallerEdges == CalleeEdge);
4738 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4741 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4743 PossibleOtherCallerNodes++;
4747 for (
auto Id : CalleeEdge->getContextIds()) {
4748 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4752 MissingAllocForContextId++;
4755 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4762 for (
auto CalleeEdge : CalleeEdges) {
4763 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4765 if (!PossibleOtherCallerNodes)
4767 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4769 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4771 if (CalleeCallerE == CalleeEdge)
4775 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4780 for (
auto Id : CalleeCallerE->getContextIds()) {
4781 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4786 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4787 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4788 PossibleOtherCallerNodes--;
4795 if (!PossibleOtherCallerNodes)
4800 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4801 if (
Count != NumCalleeClones)
4803 OtherCallersToShareMerge.
insert(OtherCaller);
4848template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4849bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4856 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4860 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4861 const FuncInfo &CalleeFunc) {
4863 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4867 struct FuncCloneInfo {
4872 DenseMap<CallInfo, CallInfo> CallMap;
4900 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4901 UnassignedCallClones;
4905 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4906 FuncInfo OrigFunc(Func);
4911 std::vector<FuncCloneInfo> FuncCloneInfos;
4912 for (
auto &
Call : CallsWithMetadata) {
4913 ContextNode *
Node = getNodeForInst(
Call);
4917 if (!Node ||
Node->Clones.empty())
4920 "Not having a call should have prevented cloning");
4924 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4928 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4930 ContextNode *CallsiteClone,
4933 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4935 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4936 DenseMap<CallInfo, CallInfo> &CallMap =
4937 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4938 CallInfo CallClone(
Call);
4939 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4940 CallClone = It->second;
4941 CallsiteClone->setCall(CallClone);
4943 for (
auto &MatchingCall :
Node->MatchingCalls) {
4944 CallInfo CallClone(MatchingCall);
4945 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4946 CallClone = It->second;
4948 MatchingCall = CallClone;
4956 auto MoveEdgeToNewCalleeCloneAndSetUp =
4957 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4958 ContextNode *OrigCallee =
Edge->Callee;
4959 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4960 removeNoneTypeCalleeEdges(NewClone);
4961 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4965 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4966 RecordCalleeFuncOfCallsite(
4967 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4974 std::deque<ContextNode *> ClonesWorklist;
4976 if (!
Node->emptyContextIds())
4977 ClonesWorklist.push_back(Node);
4983 unsigned NodeCloneCount = 0;
4984 while (!ClonesWorklist.empty()) {
4985 ContextNode *Clone = ClonesWorklist.front();
4986 ClonesWorklist.pop_front();
4995 if (FuncCloneInfos.size() < NodeCloneCount) {
4997 if (NodeCloneCount == 1) {
5002 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5003 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5007 FuncCloneInfos.push_back(
5008 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
5009 AssignCallsiteCloneToFuncClone(
5010 OrigFunc,
Call, Clone,
5011 AllocationCallToContextNodeMap.count(
Call));
5012 for (
auto &CE : Clone->CallerEdges) {
5014 if (!
CE->Caller->hasCall())
5016 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
5026 FuncInfo PreviousAssignedFuncClone;
5028 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5029 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5031 bool CallerAssignedToCloneOfFunc =
false;
5032 if (EI != Clone->CallerEdges.end()) {
5033 const std::shared_ptr<ContextEdge> &
Edge = *EI;
5034 PreviousAssignedFuncClone =
5035 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5036 CallerAssignedToCloneOfFunc =
true;
5041 DenseMap<CallInfo, CallInfo> NewCallMap;
5042 unsigned CloneNo = FuncCloneInfos.size();
5043 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
5044 "should already exist in the map");
5045 FuncInfo NewFuncClone = cloneFunctionForCallsite(
5046 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
5047 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
5048 FunctionClonesAnalysis++;
5054 if (!CallerAssignedToCloneOfFunc) {
5055 AssignCallsiteCloneToFuncClone(
5056 NewFuncClone,
Call, Clone,
5057 AllocationCallToContextNodeMap.count(
Call));
5058 for (
auto &CE : Clone->CallerEdges) {
5060 if (!
CE->Caller->hasCall())
5062 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5074 auto CallerEdges = Clone->CallerEdges;
5075 for (
auto CE : CallerEdges) {
5077 if (
CE->isRemoved()) {
5083 if (!
CE->Caller->hasCall())
5086 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5090 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5091 PreviousAssignedFuncClone)
5094 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5107 auto CalleeEdges =
CE->Caller->CalleeEdges;
5108 for (
auto CalleeEdge : CalleeEdges) {
5111 if (CalleeEdge->isRemoved()) {
5116 ContextNode *
Callee = CalleeEdge->Callee;
5120 if (Callee == Clone || !
Callee->hasCall())
5125 if (Callee == CalleeEdge->Caller)
5127 ContextNode *NewClone =
5128 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5131 removeNoneTypeCalleeEdges(Callee);
5139 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5140 OrigCall.setCloneNo(0);
5141 DenseMap<CallInfo, CallInfo> &CallMap =
5142 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5144 CallInfo NewCall(CallMap[OrigCall]);
5146 NewClone->setCall(NewCall);
5148 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5149 CallInfo OrigMatchingCall(MatchingCall);
5150 OrigMatchingCall.setCloneNo(0);
5152 CallInfo NewCall(CallMap[OrigMatchingCall]);
5155 MatchingCall = NewCall;
5164 auto FindFirstAvailFuncClone = [&]() {
5169 for (
auto &CF : FuncCloneInfos) {
5170 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5171 return CF.FuncClone;
5174 "Expected an available func clone for this callsite clone");
5191 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5192 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5196 auto CloneCallerEdges = Clone->CallerEdges;
5197 for (
auto &
Edge : CloneCallerEdges) {
5201 if (
Edge->isRemoved())
5204 if (!
Edge->Caller->hasCall())
5208 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5209 FuncInfo FuncCloneCalledByCaller =
5210 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5220 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5221 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5229 (FuncCloneAssignedToCurCallsiteClone &&
5230 FuncCloneAssignedToCurCallsiteClone !=
5231 FuncCloneCalledByCaller)) {
5246 if (FuncCloneToNewCallsiteCloneMap.count(
5247 FuncCloneCalledByCaller)) {
5248 ContextNode *NewClone =
5249 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5250 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5252 removeNoneTypeCalleeEdges(NewClone);
5255 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5256 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5259 ClonesWorklist.push_back(NewClone);
5263 removeNoneTypeCalleeEdges(Clone);
5271 if (!FuncCloneAssignedToCurCallsiteClone) {
5272 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5274 AssignCallsiteCloneToFuncClone(
5275 FuncCloneCalledByCaller,
Call, Clone,
5276 AllocationCallToContextNodeMap.count(
Call));
5280 assert(FuncCloneAssignedToCurCallsiteClone ==
5281 FuncCloneCalledByCaller);
5290 if (!FuncCloneAssignedToCurCallsiteClone) {
5291 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5292 assert(FuncCloneAssignedToCurCallsiteClone);
5294 AssignCallsiteCloneToFuncClone(
5295 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5296 AllocationCallToContextNodeMap.count(
Call));
5298 assert(FuncCloneToCurNodeCloneMap
5299 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5301 RecordCalleeFuncOfCallsite(
Edge->Caller,
5302 FuncCloneAssignedToCurCallsiteClone);
5322 if (!FuncCloneAssignedToCurCallsiteClone) {
5323 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5324 assert(FuncCloneAssignedToCurCallsiteClone &&
5325 "No available func clone for this callsite clone");
5326 AssignCallsiteCloneToFuncClone(
5327 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5328 AllocationCallToContextNodeMap.contains(
Call));
5333 for (
const auto &PE :
Node->CalleeEdges)
5335 for (
const auto &CE :
Node->CallerEdges)
5337 for (
auto *Clone :
Node->Clones) {
5339 for (
const auto &PE : Clone->CalleeEdges)
5341 for (
const auto &CE : Clone->CallerEdges)
5347 if (FuncCloneInfos.size() < 2)
5353 for (
auto &
Call : CallsWithMetadata) {
5354 ContextNode *
Node = getNodeForInst(
Call);
5355 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5361 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5365 DenseSet<unsigned> NodeCallClones;
5366 for (
auto *
C :
Node->Clones)
5367 NodeCallClones.
insert(
C->Call.cloneNo());
5370 for (
auto &FC : FuncCloneInfos) {
5375 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5380 auto &CallVector = UnassignedCallClones[
Node][
I];
5381 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5382 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5383 CallInfo CallClone = It->second;
5384 CallVector.push_back(CallClone);
5388 assert(
false &&
"Expected to find call in CallMap");
5391 for (
auto &MatchingCall :
Node->MatchingCalls) {
5392 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5393 CallInfo CallClone = It->second;
5394 CallVector.push_back(CallClone);
5398 assert(
false &&
"Expected to find call in CallMap");
5406 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5408 auto UpdateCalls = [&](ContextNode *
Node,
5409 DenseSet<const ContextNode *> &Visited,
5410 auto &&UpdateCalls) {
5411 auto Inserted = Visited.insert(Node);
5415 for (
auto *Clone :
Node->Clones)
5416 UpdateCalls(Clone, Visited, UpdateCalls);
5418 for (
auto &
Edge :
Node->CallerEdges)
5419 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5423 if (!
Node->hasCall() ||
Node->emptyContextIds())
5426 if (
Node->IsAllocation) {
5427 auto AT = allocTypeToUse(
Node->AllocTypes);
5433 !ContextIdToContextSizeInfos.empty()) {
5434 uint64_t TotalCold = 0;
5436 for (
auto Id :
Node->getContextIds()) {
5437 auto TypeI = ContextIdToAllocationType.find(Id);
5438 assert(TypeI != ContextIdToAllocationType.end());
5439 auto CSI = ContextIdToContextSizeInfos.find(Id);
5440 if (CSI != ContextIdToContextSizeInfos.end()) {
5441 for (
auto &Info : CSI->second) {
5443 if (TypeI->second == AllocationType::Cold)
5444 TotalCold +=
Info.TotalSize;
5449 AT = AllocationType::Cold;
5451 updateAllocationCall(
Node->Call, AT);
5456 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5459 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5460 updateCall(
Node->Call, CalleeFunc);
5462 for (
auto &
Call :
Node->MatchingCalls)
5463 updateCall(
Call, CalleeFunc);
5467 if (!UnassignedCallClones.
contains(Node))
5469 DenseSet<unsigned> NodeCallClones;
5470 for (
auto *
C :
Node->Clones)
5471 NodeCallClones.
insert(
C->Call.cloneNo());
5473 auto &ClonedCalls = UnassignedCallClones[
Node];
5474 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5478 if (NodeCallClones.
contains(CloneNo))
5481 for (
auto &
Call : CallVector)
5482 updateCall(
Call, CalleeFunc);
5491 DenseSet<const ContextNode *> Visited;
5492 for (
auto &Entry : AllocationCallToContextNodeMap)
5493 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5504 for (
auto &SN : FS->callsites()) {
5509 SN.Clones.size() >
I &&
5510 "Callsite summary has fewer entries than other summaries in function");
5511 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5518 for (
auto &AN : FS->allocs()) {
5522 assert(AN.Versions.size() >
I &&
5523 "Alloc summary has fewer entries than other summaries in function");
5524 if (AN.Versions.size() <=
I ||
5541 NewGV->takeName(DeclGV);
5548 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5549 if (!FuncToAliasMap.count(&
F))
5551 for (
auto *
A : FuncToAliasMap[&
F]) {
5553 auto *PrevA = M.getNamedAlias(AliasName);
5555 A->getType()->getPointerAddressSpace(),
5556 A->getLinkage(), AliasName, NewF);
5557 NewA->copyAttributesFrom(
A);
5559 TakeDeclNameAndReplace(PrevA, NewA);
5568 FunctionsClonedThinBackend++;
5585 for (
unsigned I = 1;
I < NumClones;
I++) {
5586 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5593 FunctionCloneDuplicatesThinBackend++;
5594 auto *Func = HashToFunc[Hash];
5595 if (Func->hasAvailableExternallyLinkage()) {
5601 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5603 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5606 auto *PrevF = M.getFunction(Name);
5609 TakeDeclNameAndReplace(PrevF, Alias);
5611 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5614 CloneFuncAliases(Func,
I);
5618 HashToFunc[Hash] = NewF;
5619 FunctionClonesThinBackend++;
5622 for (
auto &BB : *NewF) {
5623 for (
auto &Inst : BB) {
5624 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5625 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5630 TakeDeclNameAndReplace(PrevF, NewF);
5632 NewF->setName(Name);
5635 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5638 CloneFuncAliases(NewF,
I);
5647 const Function *CallingFunc =
nullptr) {
5666 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5672 if (!SrcFileMD &&
F.isDeclaration()) {
5676 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5681 assert(SrcFileMD || OrigName ==
F.getName());
5683 StringRef SrcFile = M.getSourceFileName();
5695 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5696 F.getName().contains(
'.')) {
5697 OrigName =
F.getName().rsplit(
'.').first;
5706 assert(TheFnVI ||
F.isDeclaration());
5710bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5712 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5713 Symtab = std::make_unique<InstrProfSymtab>();
5724 if (
Error E = Symtab->create(M,
true,
false)) {
5725 std::string SymtabFailure =
toString(std::move(
E));
5726 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5739 auto MIBIter = AllocNode.
MIBs.begin();
5740 for (
auto &MDOp : MemProfMD->
operands()) {
5742 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5747 auto ContextIterBegin =
5751 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5753 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5758 if (LastStackContextId == *ContextIter)
5760 LastStackContextId = *ContextIter;
5761 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5771bool MemProfContextDisambiguation::applyImport(
Module &M) {
5778 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5780 for (
auto &
A :
M.aliases()) {
5781 auto *Aliasee =
A.getAliaseeObject();
5783 FuncToAliasMap[
F].insert(&
A);
5786 if (!initializeIndirectCallPromotionInfo(M))
5793 OptimizationRemarkEmitter ORE(&
F);
5796 bool ClonesCreated =
false;
5797 unsigned NumClonesCreated = 0;
5798 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5808 if (ClonesCreated) {
5809 assert(NumClonesCreated == NumClones);
5816 ClonesCreated =
true;
5817 NumClonesCreated = NumClones;
5820 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5821 Function *CalledFunction, FunctionSummary *
FS) {
5823 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5835 if (CalledFunction != CB->getCalledOperand() &&
5836 (!GA || CalledFunction != GA->getAliaseeObject())) {
5837 SkippedCallsCloning++;
5843 auto CalleeOrigName = CalledFunction->getName();
5844 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5847 if (J > 0 && VMaps[J - 1]->
empty())
5851 if (!StackNode.
Clones[J])
5853 auto NewF =
M.getOrInsertFunction(
5855 CalledFunction->getFunctionType());
5869 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5870 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5872 <<
" assigned to call function clone "
5873 <<
ore::NV(
"Callee", NewF.getCallee()));
5887 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5891 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5893 "enable-import-metadata is needed to emit thinlto_src_module");
5894 StringRef SrcModule =
5897 if (GVS->modulePath() == SrcModule) {
5898 GVSummary = GVS.get();
5902 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5912 if (
FS->allocs().empty() &&
FS->callsites().empty())
5915 auto SI =
FS->callsites().begin();
5916 auto AI =
FS->allocs().begin();
5921 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5924 for (
auto CallsiteIt =
FS->callsites().rbegin();
5925 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5926 auto &Callsite = *CallsiteIt;
5930 if (!Callsite.StackIdIndices.empty())
5932 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5941 for (
auto &BB :
F) {
5942 for (
auto &
I : BB) {
5948 auto *CalledValue = CB->getCalledOperand();
5949 auto *CalledFunction = CB->getCalledFunction();
5950 if (CalledValue && !CalledFunction) {
5951 CalledValue = CalledValue->stripPointerCasts();
5958 assert(!CalledFunction &&
5959 "Expected null called function in callsite for alias");
5963 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5964 I.getMetadata(LLVMContext::MD_callsite));
5965 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5971 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5972 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5973 ? AllocTypeColdThinBackend++
5974 : AllocTypeNotColdThinBackend++;
5975 OrigAllocsThinBackend++;
5976 AllocVersionsThinBackend++;
5977 if (!MaxAllocVersionsThinBackend)
5978 MaxAllocVersionsThinBackend = 1;
5985 auto &AllocNode = *(AI++);
5993 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5995 OrigAllocsThinBackend++;
5996 AllocVersionsThinBackend += AllocNode.Versions.size();
5997 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5998 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
6008 if (AllocNode.Versions.size() == 1 &&
6011 AllocationType::NotCold ||
6013 AllocationType::None);
6014 UnclonableAllocsThinBackend++;
6020 return Type == ((uint8_t)AllocationType::NotCold |
6021 (uint8_t)AllocationType::Cold);
6025 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
6028 if (J > 0 && VMaps[J - 1]->
empty())
6031 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
6034 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
6035 : AllocTypeNotColdThinBackend++;
6050 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
6051 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
6053 <<
" marked with memprof allocation attribute "
6054 <<
ore::NV(
"Attribute", AllocTypeString));
6056 }
else if (!CallsiteContext.empty()) {
6057 if (!CalledFunction) {
6061 assert(!CI || !CI->isInlineAsm());
6071 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6077 CloneFuncIfNeeded(NumClones, FS);
6082 assert(SI !=
FS->callsites().end());
6083 auto &StackNode = *(
SI++);
6089 for (
auto StackId : CallsiteContext) {
6091 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6097 CloneCallsite(StackNode, CB, CalledFunction, FS);
6099 }
else if (CB->isTailCall() && CalledFunction) {
6102 ValueInfo CalleeVI =
6104 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6105 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6106 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6107 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6114 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6124 for (
auto &BB :
F) {
6125 for (
auto &
I : BB) {
6128 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6129 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6137unsigned MemProfContextDisambiguation::recordICPInfo(
6142 uint32_t NumCandidates;
6143 uint64_t TotalCount;
6144 auto CandidateProfileData =
6145 ICallAnalysis->getPromotionCandidatesForInstruction(
6147 if (CandidateProfileData.empty())
6153 bool ICPNeeded =
false;
6154 unsigned NumClones = 0;
6155 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6156 for (
const auto &Candidate : CandidateProfileData) {
6158 auto CalleeValueInfo =
6160 ImportSummary->getValueInfo(Candidate.Value);
6163 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6165 auto &StackNode = *(
SI++);
6170 [](
unsigned CloneNo) { return CloneNo != 0; });
6180 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6181 TotalCount, CallsiteInfoStartIndex});
6185void MemProfContextDisambiguation::performICP(
6187 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6189 OptimizationRemarkEmitter &ORE) {
6196 for (
auto &Info : ICallAnalysisInfo) {
6199 auto TotalCount =
Info.TotalCount;
6200 unsigned NumPromoted = 0;
6201 unsigned NumClones = 0;
6203 for (
auto &Candidate :
Info.CandidateProfileData) {
6214 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6215 if (TargetFunction ==
nullptr ||
6223 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6224 <<
"Memprof cannot promote indirect call: target with md5sum "
6225 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6234 const char *Reason =
nullptr;
6237 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6238 <<
"Memprof cannot promote indirect call to "
6239 <<
ore::NV(
"TargetFunction", TargetFunction)
6240 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6251 CallBase *CBClone = CB;
6252 for (
unsigned J = 0; J < NumClones; J++) {
6255 if (J > 0 && VMaps[J - 1]->
empty())
6265 TotalCount, isSamplePGO, &ORE);
6266 auto *TargetToUse = TargetFunction;
6269 if (StackNode.
Clones[J]) {
6288 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6290 <<
" promoted and assigned to call function clone "
6291 <<
ore::NV(
"Callee", TargetToUse));
6295 TotalCount -= Candidate.Count;
6300 CallBase *CBClone = CB;
6301 for (
unsigned J = 0; J < NumClones; J++) {
6304 if (J > 0 && VMaps[J - 1]->
empty())
6310 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6313 if (TotalCount != 0)
6315 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6316 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6321template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6322bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
6323 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark,
6324 bool AllowExtraAnalysis) {
6326 dbgs() <<
"CCG before cloning:\n";
6330 exportToDot(
"postbuild");
6343 dbgs() <<
"CCG after cloning:\n";
6347 exportToDot(
"cloned");
6349 bool Changed = assignFunctions();
6352 dbgs() <<
"CCG after assigning function clones:\n";
6356 exportToDot(
"clonefuncassign");
6359 printTotalSizes(
errs(), EmitRemark);
6364bool MemProfContextDisambiguation::processModule(
6366 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6371 return applyImport(M);
6384 ModuleCallsiteContextGraph CCG(M, OREGetter);
6387 return CCG.process();
6392 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6397 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6401 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6405 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6406 "-memprof-dot-context-id");
6407 if (ImportSummary) {
6417 auto ReadSummaryFile =
6419 if (!ReadSummaryFile) {
6426 if (!ImportSummaryForTestingOrErr) {
6432 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6433 ImportSummary = ImportSummaryForTesting.get();
6442 if (!processModule(M, OREGetter))
6461 bool AllowExtraAnalysis =
6464 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6465 CCG.process(EmitRemark, AllowExtraAnalysis);
6480 for (
auto &BB :
F) {
6481 for (
auto &
I : BB) {
6485 if (CI->hasFnAttr(
"memprof")) {
6486 CI->removeFnAttr(
"memprof");
6489 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6490 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6496 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6497 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
bool isWeakForLinker() const
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...