LLVM 23.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
44#include "llvm/Analysis/Loads.h"
54#include "llvm/IR/Argument.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugInfo.h"
62#include "llvm/IR/Dominators.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/IRBuilder.h"
66#include "llvm/IR/InstrTypes.h"
67#include "llvm/IR/Instruction.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/PassManager.h"
73#include "llvm/IR/Value.h"
77#include "llvm/Support/Debug.h"
85#include <algorithm>
86#include <cassert>
87#include <cstdint>
88#include <map>
89#include <optional>
90#include <utility>
91
92using namespace llvm;
93using namespace PatternMatch;
94
95#define DEBUG_TYPE "dse"
96
97STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99STATISTIC(NumFastStores, "Number of stores deleted");
100STATISTIC(NumFastOther, "Number of other instrs removed");
101STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102STATISTIC(NumModifiedStores, "Number of stores modified");
103STATISTIC(NumCFGChecks, "Number of stores modified");
104STATISTIC(NumCFGTries, "Number of stores modified");
105STATISTIC(NumCFGSuccess, "Number of stores modified");
106STATISTIC(NumGetDomMemoryDefPassed,
107 "Number of times a valid candidate is returned from getDomMemoryDef");
108STATISTIC(NumDomMemDefChecks,
109 "Number iterations check for reads in getDomMemoryDef");
110
111DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112 "Controls which MemoryDefs are eliminated.");
113
114static cl::opt<bool>
115EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial-overwrite tracking in DSE"));
118
119static cl::opt<bool>
120EnablePartialStoreMerging("enable-dse-partial-store-merging",
121 cl::init(true), cl::Hidden,
122 cl::desc("Enable partial store merging in DSE"));
123
125 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
126 cl::desc("The number of memory instructions to scan for "
127 "dead store elimination (default = 150)"));
129 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
130 cl::desc("The maximum number of steps while walking upwards to find "
131 "MemoryDefs that may be killed (default = 90)"));
132
134 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
135 cl::desc("The maximum number candidates that only partially overwrite the "
136 "killing MemoryDef to consider"
137 " (default = 5)"));
138
140 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
141 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
142 "other stores per basic block (default = 5000)"));
143
145 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
146 cl::desc(
147 "The cost of a step in the same basic block as the killing MemoryDef"
148 "(default = 1)"));
149
151 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
153 cl::desc("The cost of a step in a different basic "
154 "block than the killing MemoryDef"
155 "(default = 5)"));
156
158 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
159 cl::desc("The maximum number of blocks to check when trying to prove that "
160 "all paths to an exit go through a killing block (default = 50)"));
161
162// This flags allows or disallows DSE to optimize MemorySSA during its
163// traversal. Note that DSE optimizing MemorySSA may impact other passes
164// downstream of the DSE invocation and can lead to issues not being
165// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166// those cases, the flag can be used to check if DSE's MemorySSA optimizations
167// impact follow-up passes.
168static cl::opt<bool>
169 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
170 cl::desc("Allow DSE to optimize memory accesses."));
171
172// TODO: remove this flag.
174 "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
175 cl::desc("Enable the initializes attr improvement in DSE"));
176
177//===----------------------------------------------------------------------===//
178// Helper functions
179//===----------------------------------------------------------------------===//
180using OverlapIntervalsTy = std::map<int64_t, int64_t>;
182
183/// Returns true if the end of this instruction can be safely shortened in
184/// length.
186 // Don't shorten stores for now
187 if (isa<StoreInst>(I))
188 return false;
189
191 switch (II->getIntrinsicID()) {
192 default: return false;
193 case Intrinsic::memset:
194 case Intrinsic::memcpy:
195 case Intrinsic::memcpy_element_unordered_atomic:
196 case Intrinsic::memset_element_unordered_atomic:
197 // Do shorten memory intrinsics.
198 // FIXME: Add memmove if it's also safe to transform.
199 return true;
200 }
201 }
202
203 // Don't shorten libcalls calls for now.
204
205 return false;
206}
207
208/// Returns true if the beginning of this instruction can be safely shortened
209/// in length.
211 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
212 // easily done by offsetting the source address.
213 return isa<AnyMemSetInst>(I);
214}
215
216static std::optional<TypeSize> getPointerSize(const Value *V,
217 const DataLayout &DL,
218 const TargetLibraryInfo &TLI,
219 const Function *F) {
221 ObjectSizeOpts Opts;
223
224 if (getObjectSize(V, Size, DL, &TLI, Opts))
225 return TypeSize::getFixed(Size);
226 return std::nullopt;
227}
228
229namespace {
230
231enum OverwriteResult {
232 OW_Begin,
233 OW_Complete,
234 OW_End,
235 OW_PartialEarlierWithFullLater,
236 OW_MaybePartial,
237 OW_None,
238 OW_Unknown
239};
240
241} // end anonymous namespace
242
243/// Check if two instruction are masked stores that completely
244/// overwrite one another. More specifically, \p KillingI has to
245/// overwrite \p DeadI.
246static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
247 const Instruction *DeadI,
249 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
250 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
251 if (KillingII == nullptr || DeadII == nullptr)
252 return OW_Unknown;
253 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
254 return OW_Unknown;
255
256 switch (KillingII->getIntrinsicID()) {
257 case Intrinsic::masked_store:
258 case Intrinsic::vp_store: {
259 const DataLayout &DL = KillingII->getDataLayout();
260 auto *KillingTy = KillingII->getArgOperand(0)->getType();
261 auto *DeadTy = DeadII->getArgOperand(0)->getType();
262 if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
263 return OW_Unknown;
264 // Element count.
265 if (cast<VectorType>(KillingTy)->getElementCount() !=
266 cast<VectorType>(DeadTy)->getElementCount())
267 return OW_Unknown;
268 // Pointers.
269 Value *KillingPtr = KillingII->getArgOperand(1);
270 Value *DeadPtr = DeadII->getArgOperand(1);
271 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
272 return OW_Unknown;
273 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
274 // Masks.
275 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
276 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
277 return OW_Unknown;
278 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
279 // Masks.
280 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
281 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
282 return OW_Unknown;
283 // Lengths.
284 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
285 return OW_Unknown;
286 }
287 return OW_Complete;
288 }
289 default:
290 return OW_Unknown;
291 }
292}
293
294/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
295/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
296/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
297/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
298/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
299/// overwritten by a killing (smaller) store which doesn't write outside the big
300/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
301/// NOTE: This function must only be called if both \p KillingLoc and \p
302/// DeadLoc belong to the same underlying object with valid \p KillingOff and
303/// \p DeadOff.
304static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
305 const MemoryLocation &DeadLoc,
306 int64_t KillingOff, int64_t DeadOff,
307 Instruction *DeadI,
309 const uint64_t KillingSize = KillingLoc.Size.getValue();
310 const uint64_t DeadSize = DeadLoc.Size.getValue();
311 // We may now overlap, although the overlap is not complete. There might also
312 // be other incomplete overlaps, and together, they might cover the complete
313 // dead store.
314 // Note: The correctness of this logic depends on the fact that this function
315 // is not even called providing DepWrite when there are any intervening reads.
317 KillingOff < int64_t(DeadOff + DeadSize) &&
318 int64_t(KillingOff + KillingSize) >= DeadOff) {
319
320 // Insert our part of the overlap into the map.
321 auto &IM = IOL[DeadI];
322 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
323 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
324 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
325 << ")\n");
326
327 // Make sure that we only insert non-overlapping intervals and combine
328 // adjacent intervals. The intervals are stored in the map with the ending
329 // offset as the key (in the half-open sense) and the starting offset as
330 // the value.
331 int64_t KillingIntStart = KillingOff;
332 int64_t KillingIntEnd = KillingOff + KillingSize;
333
334 // Find any intervals ending at, or after, KillingIntStart which start
335 // before KillingIntEnd.
336 auto ILI = IM.lower_bound(KillingIntStart);
337 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
338 // This existing interval is overlapped with the current store somewhere
339 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
340 // intervals and adjusting our start and end.
341 KillingIntStart = std::min(KillingIntStart, ILI->second);
342 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
343 ILI = IM.erase(ILI);
344
345 // Continue erasing and adjusting our end in case other previous
346 // intervals are also overlapped with the current store.
347 //
348 // |--- dead 1 ---| |--- dead 2 ---|
349 // |------- killing---------|
350 //
351 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
352 assert(ILI->second > KillingIntStart && "Unexpected interval");
353 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
354 ILI = IM.erase(ILI);
355 }
356 }
357
358 IM[KillingIntEnd] = KillingIntStart;
359
360 ILI = IM.begin();
361 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
362 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
363 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
364 << ") Composite KillingLoc [" << ILI->second << ", "
365 << ILI->first << ")\n");
366 ++NumCompletePartials;
367 return OW_Complete;
368 }
369 }
370
371 // Check for a dead store which writes to all the memory locations that
372 // the killing store writes to.
373 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
374 int64_t(DeadOff + DeadSize) > KillingOff &&
375 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
376 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
377 << ", " << int64_t(DeadOff + DeadSize)
378 << ") by a killing store [" << KillingOff << ", "
379 << int64_t(KillingOff + KillingSize) << ")\n");
380 // TODO: Maybe come up with a better name?
381 return OW_PartialEarlierWithFullLater;
382 }
383
384 // Another interesting case is if the killing store overwrites the end of the
385 // dead store.
386 //
387 // |--dead--|
388 // |-- killing --|
389 //
390 // In this case we may want to trim the size of dead store to avoid
391 // generating stores to addresses which will definitely be overwritten killing
392 // store.
394 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
395 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
396 return OW_End;
397
398 // Finally, we also need to check if the killing store overwrites the
399 // beginning of the dead store.
400 //
401 // |--dead--|
402 // |-- killing --|
403 //
404 // In this case we may want to move the destination address and trim the size
405 // of dead store to avoid generating stores to addresses which will definitely
406 // be overwritten killing store.
408 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
409 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
410 "Expect to be handled as OW_Complete");
411 return OW_Begin;
412 }
413 // Otherwise, they don't completely overlap.
414 return OW_Unknown;
415}
416
417/// Returns true if the memory which is accessed by the second instruction is not
418/// modified between the first and the second instruction.
419/// Precondition: Second instruction must be dominated by the first
420/// instruction.
421static bool
424 DominatorTree *DT) {
425 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
426 // instructions which can modify the memory location accessed by SecondI.
427 //
428 // While doing the walk keep track of the address to check. It might be
429 // different in different basic blocks due to PHI translation.
430 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
432 // Keep track of the address we visited each block with. Bail out if we
433 // visit a block with different addresses.
435
436 BasicBlock::iterator FirstBBI(FirstI);
437 ++FirstBBI;
438 BasicBlock::iterator SecondBBI(SecondI);
439 BasicBlock *FirstBB = FirstI->getParent();
440 BasicBlock *SecondBB = SecondI->getParent();
441 MemoryLocation MemLoc;
442 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
443 MemLoc = MemoryLocation::getForDest(MemSet);
444 else
445 MemLoc = MemoryLocation::get(SecondI);
446
447 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
448
449 // Start checking the SecondBB.
450 WorkList.push_back(
451 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
452 bool isFirstBlock = true;
453
454 // Check all blocks going backward until we reach the FirstBB.
455 while (!WorkList.empty()) {
456 BlockAddressPair Current = WorkList.pop_back_val();
457 BasicBlock *B = Current.first;
458 PHITransAddr &Addr = Current.second;
459 Value *Ptr = Addr.getAddr();
460
461 // Ignore instructions before FirstI if this is the FirstBB.
462 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
463
465 if (isFirstBlock) {
466 // Ignore instructions after SecondI if this is the first visit of SecondBB.
467 assert(B == SecondBB && "first block is not the store block");
468 EI = SecondBBI;
469 isFirstBlock = false;
470 } else {
471 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
472 // In this case we also have to look at instructions after SecondI.
473 EI = B->end();
474 }
475 for (; BI != EI; ++BI) {
476 Instruction *I = &*BI;
477 if (I->mayWriteToMemory() && I != SecondI)
478 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
479 return false;
480 }
481 if (B != FirstBB) {
482 assert(B != &FirstBB->getParent()->getEntryBlock() &&
483 "Should not hit the entry block because SI must be dominated by LI");
484 for (BasicBlock *Pred : predecessors(B)) {
485 PHITransAddr PredAddr = Addr;
486 if (PredAddr.needsPHITranslationFromBlock(B)) {
487 if (!PredAddr.isPotentiallyPHITranslatable())
488 return false;
489 if (!PredAddr.translateValue(B, Pred, DT, false))
490 return false;
491 }
492 Value *TranslatedPtr = PredAddr.getAddr();
493 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
494 if (!Inserted.second) {
495 // We already visited this block before. If it was with a different
496 // address - bail out!
497 if (TranslatedPtr != Inserted.first->second)
498 return false;
499 // ... otherwise just skip it.
500 continue;
501 }
502 WorkList.push_back(std::make_pair(Pred, PredAddr));
503 }
504 }
505 }
506 return true;
507}
508
509static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
510 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
511 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
512 const DataLayout &DL = Inst->getDataLayout();
513 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
514 uint64_t DeadSliceOffsetInBits =
515 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
516 auto SetDeadFragExpr = [](auto *Assign,
517 DIExpression::FragmentInfo DeadFragment) {
518 // createFragmentExpression expects an offset relative to the existing
519 // fragment offset if there is one.
520 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
521 Assign->getExpression()
522 ->getFragmentInfo()
523 .value_or(DIExpression::FragmentInfo(0, 0))
524 .OffsetInBits;
526 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
527 Assign->setExpression(*NewExpr);
528 return;
529 }
530 // Failed to create a fragment expression for this so discard the value,
531 // making this a kill location.
533 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
534 DeadFragment.SizeInBits);
535 Assign->setExpression(Expr);
536 Assign->setKillLocation();
537 };
538
539 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
540 // link to any instructions. Created in the loop below (once).
541 DIAssignID *LinkToNothing = nullptr;
542 LLVMContext &Ctx = Inst->getContext();
543 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
544 if (!LinkToNothing)
545 LinkToNothing = DIAssignID::getDistinct(Ctx);
546 return LinkToNothing;
547 };
548
549 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
550 // overlapping dbg.assign intrinsic.
551 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
552 std::optional<DIExpression::FragmentInfo> NewFragment;
553 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
554 DeadSliceSizeInBits, Assign,
555 NewFragment) ||
556 !NewFragment) {
557 // We couldn't calculate the intersecting fragment for some reason. Be
558 // cautious and unlink the whole assignment from the store.
559 Assign->setKillAddress();
560 Assign->setAssignId(GetDeadLink());
561 continue;
562 }
563 // No intersect.
564 if (NewFragment->SizeInBits == 0)
565 continue;
566
567 // Fragments overlap: insert a new dbg.assign for this dead part.
568 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
569 NewAssign->insertAfter(Assign->getIterator());
570 NewAssign->setAssignId(GetDeadLink());
571 if (NewFragment)
572 SetDeadFragExpr(NewAssign, *NewFragment);
573 NewAssign->setKillAddress();
574 }
575}
576
577/// Update the attributes given that a memory access is updated (the
578/// dereferenced pointer could be moved forward when shortening a
579/// mem intrinsic).
580static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
581 uint64_t PtrOffset) {
582 // Remember old attributes.
583 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
584
585 // Find attributes that should be kept, and remove the rest.
586 AttributeMask AttrsToRemove;
587 for (auto &Attr : OldAttrs) {
588 if (Attr.hasKindAsEnum()) {
589 switch (Attr.getKindAsEnum()) {
590 default:
591 break;
592 case Attribute::Alignment:
593 // Only keep alignment if PtrOffset satisfy the alignment.
594 if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
595 continue;
596 break;
597 case Attribute::Dereferenceable:
598 case Attribute::DereferenceableOrNull:
599 // We could reduce the size of these attributes according to
600 // PtrOffset. But we simply drop these for now.
601 break;
602 case Attribute::NonNull:
603 case Attribute::NoUndef:
604 continue;
605 }
606 }
607 AttrsToRemove.addAttribute(Attr);
608 }
609
610 // Remove the attributes that should be dropped.
611 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
612}
613
614static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
615 uint64_t &DeadSize, int64_t KillingStart,
616 uint64_t KillingSize, bool IsOverwriteEnd) {
617 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
618 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
619
620 // We assume that memet/memcpy operates in chunks of the "largest" native
621 // type size and aligned on the same value. That means optimal start and size
622 // of memset/memcpy should be modulo of preferred alignment of that type. That
623 // is it there is no any sense in trying to reduce store size any further
624 // since any "extra" stores comes for free anyway.
625 // On the other hand, maximum alignment we can achieve is limited by alignment
626 // of initial store.
627
628 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
629 // "largest" native type.
630 // Note: What is the proper way to get that value?
631 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
632 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
633
634 int64_t ToRemoveStart = 0;
635 uint64_t ToRemoveSize = 0;
636 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
637 // maintained on the remaining store.
638 if (IsOverwriteEnd) {
639 // Calculate required adjustment for 'KillingStart' in order to keep
640 // remaining store size aligned on 'PerfAlign'.
641 uint64_t Off =
642 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
643 ToRemoveStart = KillingStart + Off;
644 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
645 return false;
646 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
647 } else {
648 ToRemoveStart = DeadStart;
649 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
650 "Not overlapping accesses?");
651 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
652 // Calculate required adjustment for 'ToRemoveSize'in order to keep
653 // start of the remaining store aligned on 'PerfAlign'.
654 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
655 if (Off != 0) {
656 if (ToRemoveSize <= (PrefAlign.value() - Off))
657 return false;
658 ToRemoveSize -= PrefAlign.value() - Off;
659 }
660 assert(isAligned(PrefAlign, ToRemoveSize) &&
661 "Should preserve selected alignment");
662 }
663
664 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
665 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
666
667 uint64_t NewSize = DeadSize - ToRemoveSize;
668 if (DeadIntrinsic->isAtomic()) {
669 // When shortening an atomic memory intrinsic, the newly shortened
670 // length must remain an integer multiple of the element size.
671 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
672 if (0 != NewSize % ElementSize)
673 return false;
674 }
675
676 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
677 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
678 << "\n KILLER [" << ToRemoveStart << ", "
679 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
680
681 DeadIntrinsic->setLength(NewSize);
682 DeadIntrinsic->setDestAlignment(PrefAlign);
683
684 Value *OrigDest = DeadIntrinsic->getRawDest();
685 if (!IsOverwriteEnd) {
686 Value *Indices[1] = {
687 ConstantInt::get(DeadIntrinsic->getLength()->getType(), ToRemoveSize)};
689 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
690 DeadI->getIterator());
691 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
692 DeadIntrinsic->setDest(NewDestGEP);
693 adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
694 }
695
696 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
697 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
698 IsOverwriteEnd);
699
700 // Finally update start and size of dead access.
701 if (!IsOverwriteEnd)
702 DeadStart += ToRemoveSize;
703 DeadSize = NewSize;
704
705 return true;
706}
707
709 int64_t &DeadStart, uint64_t &DeadSize) {
710 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
711 return false;
712
713 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
714 int64_t KillingStart = OII->second;
715 uint64_t KillingSize = OII->first - KillingStart;
716
717 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
718
719 if (KillingStart > DeadStart &&
720 // Note: "KillingStart - KillingStart" is known to be positive due to
721 // preceding check.
722 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
723 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
724 // be non negative due to preceding checks.
725 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
726 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
727 true)) {
728 IntervalMap.erase(OII);
729 return true;
730 }
731 }
732 return false;
733}
734
737 int64_t &DeadStart, uint64_t &DeadSize) {
739 return false;
740
741 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
742 int64_t KillingStart = OII->second;
743 uint64_t KillingSize = OII->first - KillingStart;
744
745 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
746
747 if (KillingStart <= DeadStart &&
748 // Note: "DeadStart - KillingStart" is known to be non negative due to
749 // preceding check.
750 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
751 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
752 // be positive due to preceding checks.
753 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
754 "Should have been handled as OW_Complete");
755 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
756 false)) {
757 IntervalMap.erase(OII);
758 return true;
759 }
760 }
761 return false;
762}
763
764static Constant *
766 int64_t KillingOffset, int64_t DeadOffset,
768 DominatorTree *DT) {
769
770 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
771 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
772 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
773 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
774 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
775 // If the store we find is:
776 // a) partially overwritten by the store to 'Loc'
777 // b) the killing store is fully contained in the dead one and
778 // c) they both have a constant value
779 // d) none of the two stores need padding
780 // Merge the two stores, replacing the dead store's value with a
781 // merge of both values.
782 // TODO: Deal with other constant types (vectors, etc), and probably
783 // some mem intrinsics (if needed)
784
785 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
786 APInt KillingValue =
787 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
788 unsigned KillingBits = KillingValue.getBitWidth();
789 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
790 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
791
792 // Offset of the smaller store inside the larger store
793 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
794 unsigned LShiftAmount =
795 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
796 : BitOffsetDiff;
797 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
798 LShiftAmount + KillingBits);
799 // Clear the bits we'll be replacing, then OR with the smaller
800 // store, shifted appropriately.
801 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
802 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
803 << "\n Killing: " << *KillingI
804 << "\n Merged Value: " << Merged << '\n');
805 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
806 }
807 return nullptr;
808}
809
810// Returns true if \p I is an intrinsic that does not read or write memory.
813 switch (II->getIntrinsicID()) {
814 case Intrinsic::lifetime_start:
815 case Intrinsic::lifetime_end:
816 case Intrinsic::invariant_end:
817 case Intrinsic::launder_invariant_group:
818 case Intrinsic::assume:
819 return true;
820 case Intrinsic::dbg_declare:
821 case Intrinsic::dbg_label:
822 case Intrinsic::dbg_value:
823 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
824 default:
825 return false;
826 }
827 }
828 return false;
829}
830
831// Check if we can ignore \p D for DSE.
832static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
833 Instruction *DI = D->getMemoryInst();
834 // Calls that only access inaccessible memory cannot read or write any memory
835 // locations we consider for elimination.
836 if (auto *CB = dyn_cast<CallBase>(DI))
837 if (CB->onlyAccessesInaccessibleMemory())
838 return true;
839
840 // We can eliminate stores to locations not visible to the caller across
841 // throwing instructions.
842 if (DI->mayThrow() && !DefVisibleToCaller)
843 return true;
844
845 // We can remove the dead stores, irrespective of the fence and its ordering
846 // (release/acquire/seq_cst). Fences only constraints the ordering of
847 // already visible stores, it does not make a store visible to other
848 // threads. So, skipping over a fence does not change a store from being
849 // dead.
850 if (isa<FenceInst>(DI))
851 return true;
852
853 // Skip intrinsics that do not really read or modify memory.
854 if (isNoopIntrinsic(DI))
855 return true;
856
857 return false;
858}
859
860namespace {
861
862// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
863// defined by `MemDef`.
864struct MemoryLocationWrapper {
865 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
866 bool DefByInitializesAttr)
867 : MemLoc(MemLoc), MemDef(MemDef),
868 DefByInitializesAttr(DefByInitializesAttr) {
869 assert(MemLoc.Ptr && "MemLoc should be not null");
870 UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
871 DefInst = MemDef->getMemoryInst();
872 }
873
874 MemoryLocation MemLoc;
875 const Value *UnderlyingObject;
876 MemoryDef *MemDef;
877 Instruction *DefInst;
878 bool DefByInitializesAttr = false;
879};
880
881// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
882// defined by this MemoryDef.
883struct MemoryDefWrapper {
884 MemoryDefWrapper(MemoryDef *MemDef,
885 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
886 DefInst = MemDef->getMemoryInst();
887 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
888 DefinedLocations.push_back(
889 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
890 }
891 Instruction *DefInst;
893};
894
895struct ArgumentInitInfo {
896 unsigned Idx;
897 bool IsDeadOrInvisibleOnUnwind;
898 ConstantRangeList Inits;
899};
900} // namespace
901
904 return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
905}
906
907// Return the intersected range list of the initializes attributes of "Args".
908// "Args" are call arguments that alias to each other.
909// If any argument in "Args" doesn't have dead_on_unwind attr and
910// "CallHasNoUnwindAttr" is false, return empty.
913 bool CallHasNoUnwindAttr) {
914 if (Args.empty())
915 return {};
916
917 // To address unwind, the function should have nounwind attribute or the
918 // arguments have dead or invisible on unwind. Otherwise, return empty.
919 for (const auto &Arg : Args) {
920 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
921 return {};
922 if (Arg.Inits.empty())
923 return {};
924 }
925
926 ConstantRangeList IntersectedIntervals = Args.front().Inits;
927 for (auto &Arg : Args.drop_front())
928 IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
929
930 return IntersectedIntervals;
931}
932
933namespace {
934
935struct DSEState {
936 Function &F;
937 AliasAnalysis &AA;
938 EarliestEscapeAnalysis EA;
939
940 /// The single BatchAA instance that is used to cache AA queries. It will
941 /// not be invalidated over the whole run. This is safe, because:
942 /// 1. Only memory writes are removed, so the alias cache for memory
943 /// locations remains valid.
944 /// 2. No new instructions are added (only instructions removed), so cached
945 /// information for a deleted value cannot be accessed by a re-used new
946 /// value pointer.
947 BatchAAResults BatchAA;
948
949 MemorySSA &MSSA;
950 DominatorTree &DT;
951 PostDominatorTree &PDT;
952 const TargetLibraryInfo &TLI;
953 const DataLayout &DL;
954 const LoopInfo &LI;
955
956 // Whether the function contains any irreducible control flow, useful for
957 // being accurately able to detect loops.
958 bool ContainsIrreducibleLoops;
959
960 // All MemoryDefs that potentially could kill other MemDefs.
962 // Any that should be skipped as they are already deleted
963 SmallPtrSet<MemoryAccess *, 4> SkipStores;
964 // Keep track whether a given object is captured before return or not.
965 DenseMap<const Value *, bool> CapturedBeforeReturn;
966 // Keep track of all of the objects that are invisible to the caller after
967 // the function returns.
968 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
969 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
970 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
971 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
972 // Post-order numbers for each basic block. Used to figure out if memory
973 // accesses are executed before another access.
974 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
975
976 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
977 /// basic block.
978 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
979 // Check if there are root nodes that are terminated by UnreachableInst.
980 // Those roots pessimize post-dominance queries. If there are such roots,
981 // fall back to CFG scan starting from all non-unreachable roots.
982 bool AnyUnreachableExit;
983
984 // Whether or not we should iterate on removing dead stores at the end of the
985 // function due to removing a store causing a previously captured pointer to
986 // no longer be captured.
987 bool ShouldIterateEndOfFunctionDSE;
988
989 /// Dead instructions to be removed at the end of DSE.
991
992 // Class contains self-reference, make sure it's not copied/moved.
993 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
994 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
995 const LoopInfo &LI);
996 DSEState(const DSEState &) = delete;
997 DSEState &operator=(const DSEState &) = delete;
998
999 LocationSize strengthenLocationSize(const Instruction *I,
1000 LocationSize Size) const;
1001
1002 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1003 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1004 /// location (by \p DeadI instruction).
1005 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1006 /// \p DeadI, but they both write to the same underlying object. In that
1007 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1008 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1009 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1010 OverwriteResult isOverwrite(const Instruction *KillingI,
1011 const Instruction *DeadI,
1012 const MemoryLocation &KillingLoc,
1013 const MemoryLocation &DeadLoc,
1014 int64_t &KillingOff, int64_t &DeadOff);
1015
1016 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1017 const LocationSize StoreSize);
1018
1019 bool isInvisibleToCallerOnUnwind(const Value *V);
1020
1021 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
1022
1023 // Returns a list of <MemoryLocation, bool> pairs written by I.
1024 // The bool means whether the write is from Initializes attr.
1026 getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
1027
1028 /// Assuming this instruction has a dead analyzable write, can we delete
1029 /// this instruction?
1030 bool isRemovable(Instruction *I);
1031
1032 /// Returns true if \p UseInst completely overwrites \p DefLoc
1033 /// (stored by \p DefInst).
1034 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1035 Instruction *UseInst);
1036
1037 /// Returns true if \p Def is not read before returning from the function.
1038 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
1039
1040 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1041 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1042 /// indicating whether \p I is a free-like call.
1043 std::optional<std::pair<MemoryLocation, bool>>
1044 getLocForTerminator(Instruction *I) const;
1045
1046 /// Returns true if \p I is a memory terminator instruction like
1047 /// llvm.lifetime.end or free.
1048 bool isMemTerminatorInst(Instruction *I) const;
1049
1050 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1051 /// instruction \p AccessI.
1052 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1053 Instruction *MaybeTerm);
1054
1055 // Returns true if \p Use may read from \p DefLoc.
1056 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1057
1058 /// Returns true if a dependency between \p Current and \p KillingDef is
1059 /// guaranteed to be loop invariant for the loops that they are in. Either
1060 /// because they are known to be in the same block, in the same loop level or
1061 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1062 /// during execution of the containing function.
1063 bool isGuaranteedLoopIndependent(const Instruction *Current,
1064 const Instruction *KillingDef,
1065 const MemoryLocation &CurrentLoc);
1066
1067 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1068 /// loop. In particular, this guarantees that it only references a single
1069 /// MemoryLocation during execution of the containing function.
1070 bool isGuaranteedLoopInvariant(const Value *Ptr);
1071
1072 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1073 // with no read access between them or on any other path to a function exit
1074 // block if \p KillingLoc is not accessible after the function returns. If
1075 // there is no such MemoryDef, return std::nullopt. The returned value may not
1076 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1077 // encounter an aliasing MemoryUse (read).
1078 std::optional<MemoryAccess *>
1079 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1080 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1081 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1082 bool IsMemTerm, unsigned &PartialLimit,
1083 bool IsInitializesAttrMemLoc);
1084
1085 /// Delete dead memory defs and recursively add their operands to ToRemove if
1086 /// they became dead.
1087 void
1088 deleteDeadInstruction(Instruction *SI,
1089 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
1090
1091 // Check for any extra throws between \p KillingI and \p DeadI that block
1092 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1093 // MemoryDef that may throw are handled during the walk from one def to the
1094 // next.
1095 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1096 const Value *KillingUndObj);
1097
1098 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1099 // instructions act as barriers:
1100 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1101 // object.
1102 // * Atomic stores stronger that monotonic.
1103 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
1104
1105 /// Eliminate writes to objects that are not visible in the caller and are not
1106 /// accessed before returning from the function.
1107 bool eliminateDeadWritesAtEndOfFunction();
1108
1109 /// If we have a zero initializing memset following a call to malloc,
1110 /// try folding it into a call to calloc.
1111 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
1112
1113 // Check if there is a dominating condition, that implies that the value
1114 // being stored in a ptr is already present in the ptr.
1115 bool dominatingConditionImpliesValue(MemoryDef *Def);
1116
1117 /// \returns true if \p Def is a no-op store, either because it
1118 /// directly stores back a loaded value or stores zero to a calloced object.
1119 bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
1120
1121 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1122
1123 /// Eliminates writes to locations where the value that is being written
1124 /// is already stored at the same location.
1125 bool eliminateRedundantStoresOfExistingValues();
1126
1127 // Return the locations written by the initializes attribute.
1128 // Note that this function considers:
1129 // 1. Unwind edge: use "initializes" attribute only if the callee has
1130 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1131 // or the argument is invisible to caller on unwind. That is, we don't
1132 // perform incorrect DSE on unwind edges in the current function.
1133 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1134 // the intersected range list of their "initializes" attributes.
1135 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
1136
1137 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1138 // killed by `KillingLocWrapper.MemDef`. Return whether
1139 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1140 std::pair<bool, bool>
1141 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1142
1143 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1144 // change state: whether make any change.
1145 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1146};
1147
1148} // end anonymous namespace
1149
1150static void pushMemUses(MemoryAccess *Acc,
1153 for (Use &U : Acc->uses()) {
1154 auto *MA = cast<MemoryAccess>(U.getUser());
1155 if (Visited.insert(MA).second)
1156 WorkList.push_back(MA);
1157 }
1158}
1159
1160// Return true if "Arg" is function local and isn't captured before "CB".
1161static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
1163 const Value *UnderlyingObj = getUnderlyingObject(Arg);
1164 return isIdentifiedFunctionLocal(UnderlyingObj) &&
1166 EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt*/ true));
1167}
1168
1169DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1171 const TargetLibraryInfo &TLI, const LoopInfo &LI)
1172 : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT), PDT(PDT),
1173 TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1174 // Collect blocks with throwing instructions not modeled in MemorySSA and
1175 // alloc-like objects.
1176 unsigned PO = 0;
1177 for (BasicBlock *BB : post_order(&F)) {
1178 PostOrderNumbers[BB] = PO++;
1179 for (Instruction &I : *BB) {
1180 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1181 if (I.mayThrow() && !MA)
1182 ThrowingBlocks.insert(I.getParent());
1183
1184 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1185 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1186 (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
1188 MemDefs.push_back(MD);
1189 }
1190 }
1191
1192 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1193 // stores to them are dead at the end of the function.
1194 for (Argument &AI : F.args()) {
1195 if (AI.hasPassPointeeByValueCopyAttr()) {
1196 InvisibleToCallerAfterRet.insert({&AI, true});
1197 continue;
1198 }
1199
1200 if (!AI.getType()->isPointerTy())
1201 continue;
1202
1203 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1204 if (Info.coversAllReachableMemory())
1205 InvisibleToCallerAfterRet.insert({&AI, true});
1206 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1207 InvisibleToCallerAfterRetBounded.insert({&AI, DeadBytes});
1208 }
1209
1210 // Collect whether there is any irreducible control flow in the function.
1211 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
1212
1213 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
1214 return isa<UnreachableInst>(E->getTerminator());
1215 });
1216}
1217
1218LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1219 LocationSize Size) const {
1220 if (auto *CB = dyn_cast<CallBase>(I)) {
1221 LibFunc F;
1222 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
1223 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1224 // Use the precise location size specified by the 3rd argument
1225 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1226 // instruction. memset_chk will write either the amount specified as 3rd
1227 // argument or the function will immediately abort and exit the program.
1228 // NOTE: AA may determine NoAlias if it can prove that the access size
1229 // is larger than the allocation size due to that being UB. To avoid
1230 // returning potentially invalid NoAlias results by AA, limit the use of
1231 // the precise location size to isOverwrite.
1232 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1233 return LocationSize::precise(Len->getZExtValue());
1234 }
1235 }
1236 return Size;
1237}
1238
1239OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1240 const Instruction *DeadI,
1241 const MemoryLocation &KillingLoc,
1242 const MemoryLocation &DeadLoc,
1243 int64_t &KillingOff, int64_t &DeadOff) {
1244 // AliasAnalysis does not always account for loops. Limit overwrite checks
1245 // to dependencies for which we can guarantee they are independent of any
1246 // loops they are in.
1247 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1248 return OW_Unknown;
1249
1250 LocationSize KillingLocSize =
1251 strengthenLocationSize(KillingI, KillingLoc.Size);
1252 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1253 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1254 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1255 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1256
1257 // Check whether the killing store overwrites the whole object, in which
1258 // case the size/offset of the dead store does not matter.
1259 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1260 isIdentifiedObject(KillingUndObj)) {
1261 std::optional<TypeSize> KillingUndObjSize =
1262 getPointerSize(KillingUndObj, DL, TLI, &F);
1263 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1264 return OW_Complete;
1265 }
1266
1267 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1268 // get imprecise values here, though (except for unknown sizes).
1269 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1270 // In case no constant size is known, try to an IR values for the number
1271 // of bytes written and check if they match.
1272 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1273 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1274 if (KillingMemI && DeadMemI) {
1275 const Value *KillingV = KillingMemI->getLength();
1276 const Value *DeadV = DeadMemI->getLength();
1277 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1278 return OW_Complete;
1279 }
1280
1281 // Masked stores have imprecise locations, but we can reason about them
1282 // to some extent.
1283 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1284 }
1285
1286 const TypeSize KillingSize = KillingLocSize.getValue();
1287 const TypeSize DeadSize = DeadLoc.Size.getValue();
1288 // Bail on doing Size comparison which depends on AA for now
1289 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1290 const bool AnyScalable = DeadSize.isScalable() || KillingLocSize.isScalable();
1291
1292 if (AnyScalable)
1293 return OW_Unknown;
1294 // Query the alias information
1295 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1296
1297 // If the start pointers are the same, we just have to compare sizes to see if
1298 // the killing store was larger than the dead store.
1299 if (AAR == AliasResult::MustAlias) {
1300 // Make sure that the KillingSize size is >= the DeadSize size.
1301 if (KillingSize >= DeadSize)
1302 return OW_Complete;
1303 }
1304
1305 // If we hit a partial alias we may have a full overwrite
1306 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1307 int32_t Off = AAR.getOffset();
1308 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1309 return OW_Complete;
1310 }
1311
1312 // If we can't resolve the same pointers to the same object, then we can't
1313 // analyze them at all.
1314 if (DeadUndObj != KillingUndObj) {
1315 // Non aliasing stores to different objects don't overlap. Note that
1316 // if the killing store is known to overwrite whole object (out of
1317 // bounds access overwrites whole object as well) then it is assumed to
1318 // completely overwrite any store to the same object even if they don't
1319 // actually alias (see next check).
1320 if (AAR == AliasResult::NoAlias)
1321 return OW_None;
1322 return OW_Unknown;
1323 }
1324
1325 // Okay, we have stores to two completely different pointers. Try to
1326 // decompose the pointer into a "base + constant_offset" form. If the base
1327 // pointers are equal, then we can reason about the two stores.
1328 DeadOff = 0;
1329 KillingOff = 0;
1330 const Value *DeadBasePtr =
1331 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1332 const Value *KillingBasePtr =
1333 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1334
1335 // If the base pointers still differ, we have two completely different
1336 // stores.
1337 if (DeadBasePtr != KillingBasePtr)
1338 return OW_Unknown;
1339
1340 // The killing access completely overlaps the dead store if and only if
1341 // both start and end of the dead one is "inside" the killing one:
1342 // |<->|--dead--|<->|
1343 // |-----killing------|
1344 // Accesses may overlap if and only if start of one of them is "inside"
1345 // another one:
1346 // |<->|--dead--|<-------->|
1347 // |-------killing--------|
1348 // OR
1349 // |-------dead-------|
1350 // |<->|---killing---|<----->|
1351 //
1352 // We have to be careful here as *Off is signed while *.Size is unsigned.
1353
1354 // Check if the dead access starts "not before" the killing one.
1355 if (DeadOff >= KillingOff) {
1356 // If the dead access ends "not after" the killing access then the
1357 // dead one is completely overwritten by the killing one.
1358 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1359 return OW_Complete;
1360 // If start of the dead access is "before" end of the killing access
1361 // then accesses overlap.
1362 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1363 return OW_MaybePartial;
1364 }
1365 // If start of the killing access is "before" end of the dead access then
1366 // accesses overlap.
1367 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1368 return OW_MaybePartial;
1369 }
1370
1371 // Can reach here only if accesses are known not to overlap.
1372 return OW_None;
1373}
1374
1375bool DSEState::isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1376 const LocationSize StoreSize) {
1377 if (isa<AllocaInst>(V))
1378 return true;
1379
1380 auto IBounded = InvisibleToCallerAfterRetBounded.find(V);
1381 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1382 int64_t ValueOffset;
1383 [[maybe_unused]] const Value *BaseValue =
1384 GetPointerBaseWithConstantOffset(Ptr, ValueOffset, DL);
1385 // If we are not able to find a constant offset from the UO, we have to
1386 // pessimistically assume that the store writes to memory out of the
1387 // dead_on_return bounds.
1388 if (BaseValue != V)
1389 return false;
1390 // This store is only invisible after return if we are in bounds of the
1391 // range marked dead.
1392 if (StoreSize.hasValue() &&
1393 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1394 ValueOffset >= 0)
1395 return true;
1396 }
1397 auto I = InvisibleToCallerAfterRet.insert({V, false});
1398 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1399 I.first->second = capturesNothing(PointerMayBeCaptured(
1400 V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));
1401 return I.first->second;
1402}
1403
1404bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1405 bool RequiresNoCaptureBeforeUnwind;
1406 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1407 return false;
1408 if (!RequiresNoCaptureBeforeUnwind)
1409 return true;
1410
1411 auto I = CapturedBeforeReturn.insert({V, true});
1412 if (I.second)
1413 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1414 // with the killing MemoryDef. But we refrain from doing so for now to
1415 // limit compile-time and this does not cause any changes to the number
1416 // of stores removed on a large test set in practice.
1417 I.first->second = capturesAnything(PointerMayBeCaptured(
1418 V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));
1419 return !I.first->second;
1420}
1421
1422std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction *I) const {
1423 if (!I->mayWriteToMemory())
1424 return std::nullopt;
1425
1426 if (auto *CB = dyn_cast<CallBase>(I))
1427 return MemoryLocation::getForDest(CB, TLI);
1428
1430}
1431
1433DSEState::getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1435 if (isMemTerminatorInst(I)) {
1436 if (auto Loc = getLocForTerminator(I))
1437 Locations.push_back(std::make_pair(Loc->first, false));
1438 return Locations;
1439 }
1440
1441 if (auto Loc = getLocForWrite(I))
1442 Locations.push_back(std::make_pair(*Loc, false));
1443
1444 if (ConsiderInitializesAttr) {
1445 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1446 Locations.push_back(std::make_pair(MemLoc, true));
1447 }
1448 }
1449 return Locations;
1450}
1451
1452bool DSEState::isRemovable(Instruction *I) {
1453 assert(getLocForWrite(I) && "Must have analyzable write");
1454
1455 // Don't remove volatile/atomic stores.
1456 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1457 return SI->isUnordered();
1458
1459 if (auto *CB = dyn_cast<CallBase>(I)) {
1460 // Don't remove volatile memory intrinsics.
1461 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1462 return !MI->isVolatile();
1463
1464 // Never remove dead lifetime intrinsics, e.g. because they are followed
1465 // by a free.
1466 if (CB->isLifetimeStartOrEnd())
1467 return false;
1468
1469 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1470 !CB->isTerminator();
1471 }
1472
1473 return false;
1474}
1475
1476bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1477 Instruction *DefInst, Instruction *UseInst) {
1478 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1479 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1480 // MemoryDef.
1481 if (!UseInst->mayWriteToMemory())
1482 return false;
1483
1484 if (auto *CB = dyn_cast<CallBase>(UseInst))
1485 if (CB->onlyAccessesInaccessibleMemory())
1486 return false;
1487
1488 int64_t InstWriteOffset, DepWriteOffset;
1489 if (auto CC = getLocForWrite(UseInst))
1490 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1491 DepWriteOffset) == OW_Complete;
1492 return false;
1493}
1494
1495bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1496 const MemoryLocation &DefLoc) {
1497 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1498 << *Def->getMemoryInst()
1499 << ") is at the end the function \n");
1501 SmallPtrSet<MemoryAccess *, 8> Visited;
1502
1503 pushMemUses(Def, WorkList, Visited);
1504 for (unsigned I = 0; I < WorkList.size(); I++) {
1505 if (WorkList.size() >= MemorySSAScanLimit) {
1506 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1507 return false;
1508 }
1509
1510 MemoryAccess *UseAccess = WorkList[I];
1511 if (isa<MemoryPhi>(UseAccess)) {
1512 // AliasAnalysis does not account for loops. Limit elimination to
1513 // candidates for which we can guarantee they always store to the same
1514 // memory location.
1515 if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1516 return false;
1517
1518 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1519 continue;
1520 }
1521 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1522 // of times this is called and/or caching it.
1523 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1524 if (isReadClobber(DefLoc, UseInst)) {
1525 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1526 return false;
1527 }
1528
1529 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1530 pushMemUses(UseDef, WorkList, Visited);
1531 }
1532 return true;
1533}
1534
1535std::optional<std::pair<MemoryLocation, bool>>
1536DSEState::getLocForTerminator(Instruction *I) const {
1537 if (auto *CB = dyn_cast<CallBase>(I)) {
1538 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1539 return {
1540 std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};
1541 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1542 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1543 }
1544
1545 return std::nullopt;
1546}
1547
1548bool DSEState::isMemTerminatorInst(Instruction *I) const {
1549 auto *CB = dyn_cast<CallBase>(I);
1550 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1551 getFreedOperand(CB, &TLI) != nullptr);
1552}
1553
1554bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1555 Instruction *MaybeTerm) {
1556 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1557 getLocForTerminator(MaybeTerm);
1558
1559 if (!MaybeTermLoc)
1560 return false;
1561
1562 // If the terminator is a free-like call, all accesses to the underlying
1563 // object can be considered terminated.
1564 if (getUnderlyingObject(Loc.Ptr) !=
1565 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1566 return false;
1567
1568 auto TermLoc = MaybeTermLoc->first;
1569 if (MaybeTermLoc->second) {
1570 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1571 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1572 }
1573 int64_t InstWriteOffset = 0;
1574 int64_t DepWriteOffset = 0;
1575 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1576 DepWriteOffset) == OW_Complete;
1577}
1578
1579bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1580 Instruction *UseInst) {
1581 if (isNoopIntrinsic(UseInst))
1582 return false;
1583
1584 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1585 // treated as read clobber.
1586 if (auto SI = dyn_cast<StoreInst>(UseInst))
1587 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1588
1589 if (!UseInst->mayReadFromMemory())
1590 return false;
1591
1592 if (auto *CB = dyn_cast<CallBase>(UseInst))
1593 if (CB->onlyAccessesInaccessibleMemory())
1594 return false;
1595
1596 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1597}
1598
1599bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1600 const Instruction *KillingDef,
1601 const MemoryLocation &CurrentLoc) {
1602 // If the dependency is within the same block or loop level (being careful
1603 // of irreducible loops), we know that AA will return a valid result for the
1604 // memory dependency. (Both at the function level, outside of any loop,
1605 // would also be valid but we currently disable that to limit compile time).
1606 if (Current->getParent() == KillingDef->getParent())
1607 return true;
1608 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1609 if (!ContainsIrreducibleLoops && CurrentLI &&
1610 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1611 return true;
1612 // Otherwise check the memory location is invariant to any loops.
1613 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1614}
1615
1616bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1617 Ptr = Ptr->stripPointerCasts();
1618 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1619 if (GEP->hasAllConstantIndices())
1620 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1621
1622 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1623 return I->getParent()->isEntryBlock() ||
1624 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1625 }
1626 return true;
1627}
1628
1629std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1630 MemoryDef *KillingDef, MemoryAccess *StartAccess,
1631 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1632 unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1633 unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1634 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1635 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1636 return std::nullopt;
1637 }
1638
1639 MemoryAccess *Current = StartAccess;
1640 Instruction *KillingI = KillingDef->getMemoryInst();
1641 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1642
1643 // Only optimize defining access of KillingDef when directly starting at its
1644 // defining access. The defining access also must only access KillingLoc. At
1645 // the moment we only support instructions with a single write location, so
1646 // it should be sufficient to disable optimizations for instructions that
1647 // also read from memory.
1648 bool CanOptimize = OptimizeMemorySSA &&
1649 KillingDef->getDefiningAccess() == StartAccess &&
1650 !KillingI->mayReadFromMemory();
1651
1652 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1653 std::optional<MemoryLocation> CurrentLoc;
1654 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1655 LLVM_DEBUG({
1656 dbgs() << " visiting " << *Current;
1657 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1658 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1659 << ")";
1660 dbgs() << "\n";
1661 });
1662
1663 // Reached TOP.
1664 if (MSSA.isLiveOnEntryDef(Current)) {
1665 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1666 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1667 // The first clobbering def is... none.
1668 KillingDef->setOptimized(Current);
1669 return std::nullopt;
1670 }
1671
1672 // Cost of a step. Accesses in the same block are more likely to be valid
1673 // candidates for elimination, hence consider them cheaper.
1674 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1677 if (WalkerStepLimit <= StepCost) {
1678 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1679 return std::nullopt;
1680 }
1681 WalkerStepLimit -= StepCost;
1682
1683 // Return for MemoryPhis. They cannot be eliminated directly and the
1684 // caller is responsible for traversing them.
1685 if (isa<MemoryPhi>(Current)) {
1686 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1687 return Current;
1688 }
1689
1690 // Below, check if CurrentDef is a valid candidate to be eliminated by
1691 // KillingDef. If it is not, check the next candidate.
1692 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1693 Instruction *CurrentI = CurrentDef->getMemoryInst();
1694
1695 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1696 CanOptimize = false;
1697 continue;
1698 }
1699
1700 // Before we try to remove anything, check for any extra throwing
1701 // instructions that block us from DSEing
1702 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1703 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1704 return std::nullopt;
1705 }
1706
1707 // Check for anything that looks like it will be a barrier to further
1708 // removal
1709 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1710 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1711 return std::nullopt;
1712 }
1713
1714 // If Current is known to be on path that reads DefLoc or is a read
1715 // clobber, bail out, as the path is not profitable. We skip this check
1716 // for intrinsic calls, because the code knows how to handle memcpy
1717 // intrinsics.
1718 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1719 return std::nullopt;
1720
1721 // Quick check if there are direct uses that are read-clobbers.
1722 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1723 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1724 return !MSSA.dominates(StartAccess, UseOrDef) &&
1725 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1726 return false;
1727 })) {
1728 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1729 return std::nullopt;
1730 }
1731
1732 // If Current does not have an analyzable write location or is not
1733 // removable, skip it.
1734 CurrentLoc = getLocForWrite(CurrentI);
1735 if (!CurrentLoc || !isRemovable(CurrentI)) {
1736 CanOptimize = false;
1737 continue;
1738 }
1739
1740 // AliasAnalysis does not account for loops. Limit elimination to
1741 // candidates for which we can guarantee they always store to the same
1742 // memory location and not located in different loops.
1743 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1744 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1745 CanOptimize = false;
1746 continue;
1747 }
1748
1749 if (IsMemTerm) {
1750 // If the killing def is a memory terminator (e.g. lifetime.end), check
1751 // the next candidate if the current Current does not write the same
1752 // underlying object as the terminator.
1753 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1754 CanOptimize = false;
1755 continue;
1756 }
1757 } else {
1758 int64_t KillingOffset = 0;
1759 int64_t DeadOffset = 0;
1760 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1761 KillingOffset, DeadOffset);
1762 if (CanOptimize) {
1763 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1764 // optimized access. Do not optimize if CurrentDef is already the
1765 // defining access of KillingDef.
1766 if (CurrentDef != KillingDef->getDefiningAccess() &&
1767 (OR == OW_Complete || OR == OW_MaybePartial))
1768 KillingDef->setOptimized(CurrentDef);
1769
1770 // Once a may-aliasing def is encountered do not set an optimized
1771 // access.
1772 if (OR != OW_None)
1773 CanOptimize = false;
1774 }
1775
1776 // If Current does not write to the same object as KillingDef, check
1777 // the next candidate.
1778 if (OR == OW_Unknown || OR == OW_None)
1779 continue;
1780 else if (OR == OW_MaybePartial) {
1781 // If KillingDef only partially overwrites Current, check the next
1782 // candidate if the partial step limit is exceeded. This aggressively
1783 // limits the number of candidates for partial store elimination,
1784 // which are less likely to be removable in the end.
1785 if (PartialLimit <= 1) {
1786 WalkerStepLimit -= 1;
1787 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1788 "next access\n");
1789 continue;
1790 }
1791 PartialLimit -= 1;
1792 }
1793 }
1794 break;
1795 };
1796
1797 // Accesses to objects accessible after the function returns can only be
1798 // eliminated if the access is dead along all paths to the exit. Collect
1799 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1800 // they cover all paths from MaybeDeadAccess to any function exit.
1801 SmallPtrSet<Instruction *, 16> KillingDefs;
1802 KillingDefs.insert(KillingDef->getMemoryInst());
1803 MemoryAccess *MaybeDeadAccess = Current;
1804 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1805 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1806 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1807 << *MaybeDeadI << ")\n");
1808
1810 SmallPtrSet<MemoryAccess *, 32> Visited;
1811 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1812
1813 // Check if DeadDef may be read.
1814 for (unsigned I = 0; I < WorkList.size(); I++) {
1815 MemoryAccess *UseAccess = WorkList[I];
1816
1817 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1818 // Bail out if the number of accesses to check exceeds the scan limit.
1819 if (ScanLimit < (WorkList.size() - I)) {
1820 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1821 return std::nullopt;
1822 }
1823 --ScanLimit;
1824 NumDomMemDefChecks++;
1825
1826 if (isa<MemoryPhi>(UseAccess)) {
1827 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1828 return DT.properlyDominates(KI->getParent(), UseAccess->getBlock());
1829 })) {
1830 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1831 continue;
1832 }
1833 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1834 pushMemUses(UseAccess, WorkList, Visited);
1835 continue;
1836 }
1837
1838 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1839 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1840
1841 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1842 return DT.dominates(KI, UseInst);
1843 })) {
1844 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1845 continue;
1846 }
1847
1848 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1849 // MemoryAccesses. We do not have to check it's users.
1850 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1851 LLVM_DEBUG(
1852 dbgs()
1853 << " ... skipping, memterminator invalidates following accesses\n");
1854 continue;
1855 }
1856
1857 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1858 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1859 pushMemUses(UseAccess, WorkList, Visited);
1860 continue;
1861 }
1862
1863 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1864 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1865 return std::nullopt;
1866 }
1867
1868 // Uses which may read the original MemoryDef mean we cannot eliminate the
1869 // original MD. Stop walk.
1870 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1871 // the callee would be dominated by initializations, so it should be safe.
1872 bool IsKillingDefFromInitAttr = false;
1873 if (IsInitializesAttrMemLoc) {
1874 if (KillingI == UseInst &&
1875 KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1876 IsKillingDefFromInitAttr = true;
1877 }
1878
1879 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1880 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1881 return std::nullopt;
1882 }
1883
1884 // If this worklist walks back to the original memory access (and the
1885 // pointer is not guarenteed loop invariant) then we cannot assume that a
1886 // store kills itself.
1887 if (MaybeDeadAccess == UseAccess &&
1888 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1889 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1890 return std::nullopt;
1891 }
1892 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1893 // if it reads the memory location.
1894 // TODO: It would probably be better to check for self-reads before
1895 // calling the function.
1896 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1897 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1898 continue;
1899 }
1900
1901 // Check all uses for MemoryDefs, except for defs completely overwriting
1902 // the original location. Otherwise we have to check uses of *all*
1903 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1904 // miss cases like the following
1905 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1906 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1907 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1908 // (The Use points to the *first* Def it may alias)
1909 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1910 // stores [0,1]
1911 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1912 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1913 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1914 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1915 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1916 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1917 KillingLoc.Size)) {
1919 << " ... found killing def " << *UseInst << "\n");
1920 KillingDefs.insert(UseInst);
1921 }
1922 } else {
1924 << " ... found preceeding def " << *UseInst << "\n");
1925 return std::nullopt;
1926 }
1927 } else
1928 pushMemUses(UseDef, WorkList, Visited);
1929 }
1930 }
1931
1932 // For accesses to locations visible after the function returns, make sure
1933 // that the location is dead (=overwritten) along all paths from
1934 // MaybeDeadAccess to the exit.
1935 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1936 KillingLoc.Size)) {
1937 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1938 for (Instruction *KD : KillingDefs)
1939 KillingBlocks.insert(KD->getParent());
1940 assert(!KillingBlocks.empty() &&
1941 "Expected at least a single killing block");
1942
1943 // Find the common post-dominator of all killing blocks.
1944 BasicBlock *CommonPred = *KillingBlocks.begin();
1945 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1946 if (!CommonPred)
1947 break;
1948 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1949 }
1950
1951 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1952 // there is a path from MaybeDeadAccess to an exit not going through a
1953 // killing block.
1954 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1955 if (!AnyUnreachableExit)
1956 return std::nullopt;
1957
1958 // Fall back to CFG scan starting at all non-unreachable roots if not
1959 // all paths to the exit go through CommonPred.
1960 CommonPred = nullptr;
1961 }
1962
1963 // If CommonPred itself is in the set of killing blocks, we're done.
1964 if (KillingBlocks.count(CommonPred))
1965 return {MaybeDeadAccess};
1966
1967 SetVector<BasicBlock *> WorkList;
1968 // If CommonPred is null, there are multiple exits from the function.
1969 // They all have to be added to the worklist.
1970 if (CommonPred)
1971 WorkList.insert(CommonPred);
1972 else
1973 for (BasicBlock *R : PDT.roots()) {
1974 if (!isa<UnreachableInst>(R->getTerminator()))
1975 WorkList.insert(R);
1976 }
1977
1978 NumCFGTries++;
1979 // Check if all paths starting from an exit node go through one of the
1980 // killing blocks before reaching MaybeDeadAccess.
1981 for (unsigned I = 0; I < WorkList.size(); I++) {
1982 NumCFGChecks++;
1983 BasicBlock *Current = WorkList[I];
1984 if (KillingBlocks.count(Current))
1985 continue;
1986 if (Current == MaybeDeadAccess->getBlock())
1987 return std::nullopt;
1988
1989 // MaybeDeadAccess is reachable from the entry, so we don't have to
1990 // explore unreachable blocks further.
1991 if (!DT.isReachableFromEntry(Current))
1992 continue;
1993
1994 WorkList.insert_range(predecessors(Current));
1995
1996 if (WorkList.size() >= MemorySSAPathCheckLimit)
1997 return std::nullopt;
1998 }
1999 NumCFGSuccess++;
2000 }
2001
2002 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
2003 // potentially dead.
2004 return {MaybeDeadAccess};
2005}
2006
2007void DSEState::deleteDeadInstruction(Instruction *SI,
2008 SmallPtrSetImpl<MemoryAccess *> *Deleted) {
2009 MemorySSAUpdater Updater(&MSSA);
2010 SmallVector<Instruction *, 32> NowDeadInsts;
2011 NowDeadInsts.push_back(SI);
2012 --NumFastOther;
2013
2014 while (!NowDeadInsts.empty()) {
2015 Instruction *DeadInst = NowDeadInsts.pop_back_val();
2016 ++NumFastOther;
2017
2018 // Try to preserve debug information attached to the dead instruction.
2019 salvageDebugInfo(*DeadInst);
2020 salvageKnowledge(DeadInst);
2021
2022 // Remove the Instruction from MSSA.
2023 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
2024 bool IsMemDef = MA && isa<MemoryDef>(MA);
2025 if (MA) {
2026 if (IsMemDef) {
2027 auto *MD = cast<MemoryDef>(MA);
2028 SkipStores.insert(MD);
2029 if (Deleted)
2030 Deleted->insert(MD);
2031 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
2032 if (SI->getValueOperand()->getType()->isPointerTy()) {
2033 const Value *UO = getUnderlyingObject(SI->getValueOperand());
2034 if (CapturedBeforeReturn.erase(UO))
2035 ShouldIterateEndOfFunctionDSE = true;
2036 InvisibleToCallerAfterRet.erase(UO);
2037 InvisibleToCallerAfterRetBounded.erase(UO);
2038 }
2039 }
2040 }
2041
2042 Updater.removeMemoryAccess(MA);
2043 }
2044
2045 auto I = IOLs.find(DeadInst->getParent());
2046 if (I != IOLs.end())
2047 I->second.erase(DeadInst);
2048 // Remove its operands
2049 for (Use &O : DeadInst->operands())
2050 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
2051 O.set(PoisonValue::get(O->getType()));
2052 if (isInstructionTriviallyDead(OpI, &TLI))
2053 NowDeadInsts.push_back(OpI);
2054 }
2055
2056 EA.removeInstruction(DeadInst);
2057 // Remove memory defs directly if they don't produce results, but only
2058 // queue other dead instructions for later removal. They may have been
2059 // used as memory locations that have been cached by BatchAA. Removing
2060 // them here may lead to newly created instructions to be allocated at the
2061 // same address, yielding stale cache entries.
2062 if (IsMemDef && DeadInst->getType()->isVoidTy())
2063 DeadInst->eraseFromParent();
2064 else
2065 ToRemove.push_back(DeadInst);
2066 }
2067}
2068
2069bool DSEState::mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
2070 const Value *KillingUndObj) {
2071 // First see if we can ignore it by using the fact that KillingI is an
2072 // alloca/alloca like object that is not visible to the caller during
2073 // execution of the function.
2074 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
2075 return false;
2076
2077 if (KillingI->getParent() == DeadI->getParent())
2078 return ThrowingBlocks.count(KillingI->getParent());
2079 return !ThrowingBlocks.empty();
2080}
2081
2082bool DSEState::isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
2083 // If DeadI may throw it acts as a barrier, unless we are to an
2084 // alloca/alloca like object that does not escape.
2085 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
2086 return true;
2087
2088 // If DeadI is an atomic load/store stronger than monotonic, do not try to
2089 // eliminate/reorder it.
2090 if (DeadI->isAtomic()) {
2091 if (auto *LI = dyn_cast<LoadInst>(DeadI))
2092 return isStrongerThanMonotonic(LI->getOrdering());
2093 if (auto *SI = dyn_cast<StoreInst>(DeadI))
2094 return isStrongerThanMonotonic(SI->getOrdering());
2095 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
2096 return isStrongerThanMonotonic(ARMW->getOrdering());
2097 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
2098 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
2099 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
2100 llvm_unreachable("other instructions should be skipped in MemorySSA");
2101 }
2102 return false;
2103}
2104
2105bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2106 bool MadeChange = false;
2107 LLVM_DEBUG(
2108 dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2109 do {
2110 ShouldIterateEndOfFunctionDSE = false;
2111 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
2112 if (SkipStores.contains(Def))
2113 continue;
2114
2115 Instruction *DefI = Def->getMemoryInst();
2116 auto DefLoc = getLocForWrite(DefI);
2117 if (!DefLoc || !isRemovable(DefI)) {
2118 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2119 "instruction not removable.\n");
2120 continue;
2121 }
2122
2123 // NOTE: Currently eliminating writes at the end of a function is
2124 // limited to MemoryDefs with a single underlying object, to save
2125 // compile-time. In practice it appears the case with multiple
2126 // underlying objects is very uncommon. If it turns out to be important,
2127 // we can use getUnderlyingObjects here instead.
2128 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
2129 if (!isInvisibleToCallerAfterRet(UO, DefLoc->Ptr, DefLoc->Size))
2130 continue;
2131
2132 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
2133 // See through pointer-to-pointer bitcasts
2134 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2135 "of the function\n");
2137 ++NumFastStores;
2138 MadeChange = true;
2139 }
2140 }
2141 } while (ShouldIterateEndOfFunctionDSE);
2142 return MadeChange;
2143}
2144
2145bool DSEState::tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2146 Instruction *DefI = Def->getMemoryInst();
2147 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2148 if (!MemSet)
2149 // TODO: Could handle zero store to small allocation as well.
2150 return false;
2151 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2152 if (!StoredConstant || !StoredConstant->isNullValue())
2153 return false;
2154
2155 if (!isRemovable(DefI))
2156 // The memset might be volatile..
2157 return false;
2158
2159 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
2160 F.hasFnAttribute(Attribute::SanitizeAddress) ||
2161 F.hasFnAttribute(Attribute::SanitizeHWAddress) || F.getName() == "calloc")
2162 return false;
2163 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
2164 if (!Malloc)
2165 return false;
2166 auto *InnerCallee = Malloc->getCalledFunction();
2167 if (!InnerCallee)
2168 return false;
2169 LibFunc Func = NotLibFunc;
2170 StringRef ZeroedVariantName;
2171 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
2172 Func != LibFunc_malloc) {
2173 Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");
2174 if (!Attr.isValid())
2175 return false;
2176 ZeroedVariantName = Attr.getValueAsString();
2177 if (ZeroedVariantName.empty())
2178 return false;
2179 }
2180
2181 // Gracefully handle malloc with unexpected memory attributes.
2182 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
2183 if (!MallocDef)
2184 return false;
2185
2186 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2187 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2188 // of malloc block
2189 auto *MallocBB = Malloc->getParent(), *MemsetBB = Memset->getParent();
2190 if (MallocBB == MemsetBB)
2191 return true;
2192 auto *Ptr = Memset->getArgOperand(0);
2193 auto *TI = MallocBB->getTerminator();
2194 BasicBlock *TrueBB, *FalseBB;
2195 if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2196 m_Zero()),
2197 TrueBB, FalseBB)))
2198 return false;
2199 if (MemsetBB != FalseBB)
2200 return false;
2201 return true;
2202 };
2203
2204 if (Malloc->getOperand(0) != MemSet->getLength())
2205 return false;
2206 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
2207 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2208 return false;
2209 IRBuilder<> IRB(Malloc);
2210 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2211 Value *Calloc = nullptr;
2212 if (!ZeroedVariantName.empty()) {
2213 LLVMContext &Ctx = Malloc->getContext();
2214 AttributeList Attrs = InnerCallee->getAttributes();
2215 AllocFnKind AllocKind =
2216 Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
2217 AllocFnKind::Zeroed;
2218 AllocKind &= ~AllocFnKind::Uninitialized;
2219 Attrs =
2220 Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
2221 .removeFnAttribute(Ctx, "alloc-variant-zeroed");
2222 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2223 ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);
2224 cast<Function>(ZeroedVariant.getCallee())
2225 ->setCallingConv(Malloc->getCallingConv());
2227 Args.append(Malloc->arg_begin(), Malloc->arg_end());
2228 CallInst *CI = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);
2229 CI->setCallingConv(Malloc->getCallingConv());
2230 Calloc = CI;
2231 } else {
2232 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2233 Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
2234 IRB, TLI, Malloc->getType()->getPointerAddressSpace());
2235 }
2236 if (!Calloc)
2237 return false;
2238
2239 MemorySSAUpdater Updater(&MSSA);
2240 auto *NewAccess = Updater.createMemoryAccessAfter(cast<Instruction>(Calloc),
2241 nullptr, MallocDef);
2242 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2243 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2244 Malloc->replaceAllUsesWith(Calloc);
2246 return true;
2247}
2248
2249bool DSEState::dominatingConditionImpliesValue(MemoryDef *Def) {
2250 auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
2251 BasicBlock *StoreBB = StoreI->getParent();
2252 Value *StorePtr = StoreI->getPointerOperand();
2253 Value *StoreVal = StoreI->getValueOperand();
2254
2255 DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
2256 if (!IDom)
2257 return false;
2258
2259 auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
2260 if (!BI || !BI->isConditional())
2261 return false;
2262
2263 // In case both blocks are the same, it is not possible to determine
2264 // if optimization is possible. (We would not want to optimize a store
2265 // in the FalseBB if condition is true and vice versa.)
2266 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2267 return false;
2268
2269 Instruction *ICmpL;
2270 CmpPredicate Pred;
2271 if (!match(BI->getCondition(),
2272 m_c_ICmp(Pred,
2273 m_CombineAnd(m_Load(m_Specific(StorePtr)),
2274 m_Instruction(ICmpL)),
2275 m_Specific(StoreVal))) ||
2276 !ICmpInst::isEquality(Pred))
2277 return false;
2278
2279 // Ensure the replacement is allowed when comparing pointers, as
2280 // the equality compares addresses only, not pointers' provenance.
2281 if (StoreVal->getType()->isPointerTy() &&
2282 !canReplacePointersIfEqual(StoreVal, ICmpL, DL))
2283 return false;
2284
2285 // In case the else blocks also branches to the if block or the other way
2286 // around it is not possible to determine if the optimization is possible.
2287 if (Pred == ICmpInst::ICMP_EQ &&
2288 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
2289 StoreBB))
2290 return false;
2291
2292 if (Pred == ICmpInst::ICMP_NE &&
2293 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
2294 StoreBB))
2295 return false;
2296
2297 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
2298 MemoryAccess *ClobAcc =
2299 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2300
2301 return MSSA.dominates(ClobAcc, LoadAcc);
2302}
2303
2304bool DSEState::storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2305 Instruction *DefI = Def->getMemoryInst();
2306 StoreInst *Store = dyn_cast<StoreInst>(DefI);
2307 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2308 Constant *StoredConstant = nullptr;
2309 if (Store)
2310 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2311 else if (MemSet)
2312 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2313 else
2314 return false;
2315
2316 if (!isRemovable(DefI))
2317 return false;
2318
2319 if (StoredConstant) {
2320 Constant *InitC =
2321 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2322 // If the clobbering access is LiveOnEntry, no instructions between them
2323 // can modify the memory location.
2324 if (InitC && InitC == StoredConstant)
2325 return MSSA.isLiveOnEntryDef(
2326 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2327 }
2328
2329 if (!Store)
2330 return false;
2331
2332 if (dominatingConditionImpliesValue(Def))
2333 return true;
2334
2335 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2336 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2337 // Get the defining access for the load.
2338 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2339 // Fast path: the defining accesses are the same.
2340 if (LoadAccess == Def->getDefiningAccess())
2341 return true;
2342
2343 // Look through phi accesses. Recursively scan all phi accesses by
2344 // adding them to a worklist. Bail when we run into a memory def that
2345 // does not match LoadAccess.
2346 SetVector<MemoryAccess *> ToCheck;
2347 MemoryAccess *Current =
2348 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2349 // We don't want to bail when we run into the store memory def. But,
2350 // the phi access may point to it. So, pretend like we've already
2351 // checked it.
2352 ToCheck.insert(Def);
2353 ToCheck.insert(Current);
2354 // Start at current (1) to simulate already having checked Def.
2355 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2356 Current = ToCheck[I];
2357 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2358 // Check all the operands.
2359 for (auto &Use : PhiAccess->incoming_values())
2360 ToCheck.insert(cast<MemoryAccess>(&Use));
2361 continue;
2362 }
2363
2364 // If we found a memory def, bail. This happens when we have an
2365 // unrelated write in between an otherwise noop store.
2366 assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2367 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2368 // We are searching for the definition of the store's destination.
2369 // So, if that is the same definition as the load, then this is a
2370 // noop. Otherwise, fail.
2371 if (LoadAccess != Current)
2372 return false;
2373 }
2374 return true;
2375 }
2376 }
2377
2378 return false;
2379}
2380
2381bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2382 bool Changed = false;
2383 for (auto OI : IOL) {
2384 Instruction *DeadI = OI.first;
2385 MemoryLocation Loc = *getLocForWrite(DeadI);
2386 assert(isRemovable(DeadI) && "Expect only removable instruction");
2387
2388 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2389 int64_t DeadStart = 0;
2390 uint64_t DeadSize = Loc.Size.getValue();
2391 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2392 OverlapIntervalsTy &IntervalMap = OI.second;
2393 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2394 if (IntervalMap.empty())
2395 continue;
2396 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2397 }
2398 return Changed;
2399}
2400
2401bool DSEState::eliminateRedundantStoresOfExistingValues() {
2402 bool MadeChange = false;
2403 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2404 "already existing value\n");
2405 for (auto *Def : MemDefs) {
2406 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2407 continue;
2408
2409 Instruction *DefInst = Def->getMemoryInst();
2410 auto MaybeDefLoc = getLocForWrite(DefInst);
2411 if (!MaybeDefLoc || !isRemovable(DefInst))
2412 continue;
2413
2414 MemoryDef *UpperDef;
2415 // To conserve compile-time, we avoid walking to the next clobbering def.
2416 // Instead, we just try to get the optimized access, if it exists. DSE
2417 // will try to optimize defs during the earlier traversal.
2418 if (Def->isOptimized())
2419 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2420 else
2421 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2422 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2423 continue;
2424
2425 Instruction *UpperInst = UpperDef->getMemoryInst();
2426 auto IsRedundantStore = [&]() {
2427 // We don't care about differences in call attributes here.
2428 if (DefInst->isIdenticalToWhenDefined(UpperInst,
2429 /*IntersectAttrs=*/true))
2430 return true;
2431 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2432 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2433 // MemSetInst must have a write location.
2434 auto UpperLoc = getLocForWrite(UpperInst);
2435 if (!UpperLoc)
2436 return false;
2437 int64_t InstWriteOffset = 0;
2438 int64_t DepWriteOffset = 0;
2439 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2440 InstWriteOffset, DepWriteOffset);
2441 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2442 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2443 OR == OW_Complete;
2444 }
2445 }
2446 return false;
2447 };
2448
2449 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2450 continue;
2451 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2452 << '\n');
2453 deleteDeadInstruction(DefInst);
2454 NumRedundantStores++;
2455 MadeChange = true;
2456 }
2457 return MadeChange;
2458}
2459
2461DSEState::getInitializesArgMemLoc(const Instruction *I) {
2462 const CallBase *CB = dyn_cast<CallBase>(I);
2463 if (!CB)
2464 return {};
2465
2466 // Collect aliasing arguments and their initializes ranges.
2467 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2468 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2469 Value *CurArg = CB->getArgOperand(Idx);
2470 if (!CurArg->getType()->isPointerTy())
2471 continue;
2472
2473 ConstantRangeList Inits;
2474 Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2475 // initializes on byval arguments refers to the callee copy, not the
2476 // original memory the caller passed in.
2477 if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))
2478 Inits = InitializesAttr.getValueAsConstantRangeList();
2479
2480 // Check whether "CurArg" could alias with global variables. We require
2481 // either it's function local and isn't captured before or the "CB" only
2482 // accesses arg or inaccessible mem.
2483 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2484 !isFuncLocalAndNotCaptured(CurArg, CB, EA))
2485 Inits = ConstantRangeList();
2486
2487 // We don't perform incorrect DSE on unwind edges in the current function,
2488 // and use the "initializes" attribute to kill dead stores if:
2489 // - The call does not throw exceptions, "CB->doesNotThrow()".
2490 // - Or the callee parameter has "dead_on_unwind" attribute.
2491 // - Or the argument is invisible to caller on unwind, and there are no
2492 // unwind edges from this call in the current function (e.g. `CallInst`).
2493 bool IsDeadOrInvisibleOnUnwind =
2494 CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2495 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2496 ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2497 bool FoundAliasing = false;
2498 for (auto &[Arg, AliasList] : Arguments) {
2499 auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2501 if (AAR == AliasResult::NoAlias) {
2502 continue;
2503 } else if (AAR == AliasResult::MustAlias) {
2504 FoundAliasing = true;
2505 AliasList.push_back(InitInfo);
2506 } else {
2507 // For PartialAlias and MayAlias, there is an offset or may be an
2508 // unknown offset between the arguments and we insert an empty init
2509 // range to discard the entire initializes info while intersecting.
2510 FoundAliasing = true;
2511 AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2512 ConstantRangeList()});
2513 }
2514 }
2515 if (!FoundAliasing)
2516 Arguments[CurArg] = {InitInfo};
2517 }
2518
2520 for (const auto &[_, Args] : Arguments) {
2521 auto IntersectedRanges =
2523 if (IntersectedRanges.empty())
2524 continue;
2525
2526 for (const auto &Arg : Args) {
2527 for (const auto &Range : IntersectedRanges) {
2528 int64_t Start = Range.getLower().getSExtValue();
2529 int64_t End = Range.getUpper().getSExtValue();
2530 // For now, we only handle locations starting at offset 0.
2531 if (Start == 0)
2532 Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2533 LocationSize::precise(End - Start),
2534 CB->getAAMetadata()));
2535 }
2536 }
2537 }
2538 return Locations;
2539}
2540
2541std::pair<bool, bool>
2542DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2543 bool Changed = false;
2544 bool DeletedKillingLoc = false;
2545 unsigned ScanLimit = MemorySSAScanLimit;
2546 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2547 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2548 // Worklist of MemoryAccesses that may be killed by
2549 // "KillingLocWrapper.MemDef".
2550 SmallSetVector<MemoryAccess *, 8> ToCheck;
2551 // Track MemoryAccesses that have been deleted in the loop below, so we can
2552 // skip them. Don't use SkipStores for this, which may contain reused
2553 // MemoryAccess addresses.
2554 SmallPtrSet<MemoryAccess *, 8> Deleted;
2555 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2556 ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2557
2558 // Check if MemoryAccesses in the worklist are killed by
2559 // "KillingLocWrapper.MemDef".
2560 for (unsigned I = 0; I < ToCheck.size(); I++) {
2561 MemoryAccess *Current = ToCheck[I];
2562 if (Deleted.contains(Current))
2563 continue;
2564 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2565 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2566 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2567 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2568 KillingLocWrapper.DefByInitializesAttr);
2569
2570 if (!MaybeDeadAccess) {
2571 LLVM_DEBUG(dbgs() << " finished walk\n");
2572 continue;
2573 }
2574 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2575 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2576 if (isa<MemoryPhi>(DeadAccess)) {
2577 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2578 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2579 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2580 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2581 BasicBlock *PhiBlock = DeadAccess->getBlock();
2582
2583 // We only consider incoming MemoryAccesses that come before the
2584 // MemoryPhi. Otherwise we could discover candidates that do not
2585 // strictly dominate our starting def.
2586 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2587 ToCheck.insert(IncomingAccess);
2588 }
2589 continue;
2590 }
2591 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2592 // It would incorrectly consider a call instruction as redundant store
2593 // and remove this call instruction.
2594 // TODO: this conflates the existence of a MemoryLocation with being able
2595 // to delete the instruction. Fix isRemovable() to consider calls with
2596 // side effects that cannot be removed, e.g. calls with the initializes
2597 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2598 MemoryDefWrapper DeadDefWrapper(
2599 cast<MemoryDef>(DeadAccess),
2600 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2601 /*ConsiderInitializesAttr=*/false));
2602 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2603 MemoryLocationWrapper &DeadLocWrapper =
2604 DeadDefWrapper.DefinedLocations.front();
2605 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2606 ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2607 NumGetDomMemoryDefPassed++;
2608
2609 if (!DebugCounter::shouldExecute(MemorySSACounter))
2610 continue;
2611 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2612 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2613 continue;
2614 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2615 << *DeadLocWrapper.DefInst << "\n KILLER: "
2616 << *KillingLocWrapper.DefInst << '\n');
2617 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2618 ++NumFastStores;
2619 Changed = true;
2620 } else {
2621 // Check if DeadI overwrites KillingI.
2622 int64_t KillingOffset = 0;
2623 int64_t DeadOffset = 0;
2624 OverwriteResult OR =
2625 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2626 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2627 KillingOffset, DeadOffset);
2628 if (OR == OW_MaybePartial) {
2629 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2630 OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2631 KillingOffset, DeadOffset,
2632 DeadLocWrapper.DefInst, IOL);
2633 }
2634 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2635 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2636 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2637 // We are re-using tryToMergePartialOverlappingStores, which requires
2638 // DeadSI to dominate KillingSI.
2639 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2640 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2641 if (Constant *Merged = tryToMergePartialOverlappingStores(
2642 KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2643 &DT)) {
2644
2645 // Update stored value of earlier store to merged constant.
2646 DeadSI->setOperand(0, Merged);
2647 ++NumModifiedStores;
2648 Changed = true;
2649 DeletedKillingLoc = true;
2650
2651 // Remove killing store and remove any outstanding overlap
2652 // intervals for the updated store.
2653 deleteDeadInstruction(KillingSI, &Deleted);
2654 auto I = IOLs.find(DeadSI->getParent());
2655 if (I != IOLs.end())
2656 I->second.erase(DeadSI);
2657 break;
2658 }
2659 }
2660 }
2661 if (OR == OW_Complete) {
2662 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2663 << *DeadLocWrapper.DefInst << "\n KILLER: "
2664 << *KillingLocWrapper.DefInst << '\n');
2665 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2666 ++NumFastStores;
2667 Changed = true;
2668 }
2669 }
2670 }
2671
2672 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2673 "SkipStores and Deleted out of sync?");
2674
2675 return {Changed, DeletedKillingLoc};
2676}
2677
2678bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2679 if (KillingDefWrapper.DefinedLocations.empty()) {
2680 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2681 << *KillingDefWrapper.DefInst << "\n");
2682 return false;
2683 }
2684
2685 bool MadeChange = false;
2686 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2687 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2688 << *KillingLocWrapper.MemDef << " ("
2689 << *KillingLocWrapper.DefInst << ")\n");
2690 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2691 MadeChange |= Changed;
2692
2693 // Check if the store is a no-op.
2694 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2695 KillingLocWrapper.UnderlyingObject)) {
2696 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2697 << *KillingLocWrapper.DefInst << '\n');
2698 deleteDeadInstruction(KillingLocWrapper.DefInst);
2699 NumRedundantStores++;
2700 MadeChange = true;
2701 continue;
2702 }
2703 // Can we form a calloc from a memset/malloc pair?
2704 if (!DeletedKillingLoc &&
2705 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2706 KillingLocWrapper.UnderlyingObject)) {
2707 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2708 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2709 deleteDeadInstruction(KillingLocWrapper.DefInst);
2710 MadeChange = true;
2711 continue;
2712 }
2713 }
2714 return MadeChange;
2715}
2716
2719 const TargetLibraryInfo &TLI,
2720 const LoopInfo &LI) {
2721 bool MadeChange = false;
2722 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2723 // For each store:
2724 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2725 MemoryDef *KillingDef = State.MemDefs[I];
2726 if (State.SkipStores.count(KillingDef))
2727 continue;
2728
2729 MemoryDefWrapper KillingDefWrapper(
2730 KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2732 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2733 }
2734
2736 for (auto &KV : State.IOLs)
2737 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2738
2739 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2740 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2741
2742 while (!State.ToRemove.empty()) {
2743 Instruction *DeadInst = State.ToRemove.pop_back_val();
2744 DeadInst->eraseFromParent();
2745 }
2746
2747 return MadeChange;
2748}
2749
2750//===----------------------------------------------------------------------===//
2751// DSE Pass
2752//===----------------------------------------------------------------------===//
2757 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2759 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2760
2761 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2762
2763#ifdef LLVM_ENABLE_STATS
2765 for (auto &I : instructions(F))
2766 NumRemainingStores += isa<StoreInst>(&I);
2767#endif
2768
2769 if (!Changed)
2770 return PreservedAnalyses::all();
2771
2775 PA.preserve<LoopAnalysis>();
2776 return PA;
2777}
2778
2779namespace {
2780
2781/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2782class DSELegacyPass : public FunctionPass {
2783public:
2784 static char ID; // Pass identification, replacement for typeid
2785
2786 DSELegacyPass() : FunctionPass(ID) {
2788 }
2789
2790 bool runOnFunction(Function &F) override {
2791 if (skipFunction(F))
2792 return false;
2793
2794 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2795 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2796 const TargetLibraryInfo &TLI =
2797 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2798 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2799 PostDominatorTree &PDT =
2800 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2801 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2802
2803 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2804
2805#ifdef LLVM_ENABLE_STATS
2807 for (auto &I : instructions(F))
2808 NumRemainingStores += isa<StoreInst>(&I);
2809#endif
2810
2811 return Changed;
2812 }
2813
2814 void getAnalysisUsage(AnalysisUsage &AU) const override {
2815 AU.setPreservesCFG();
2816 AU.addRequired<AAResultsWrapperPass>();
2817 AU.addRequired<TargetLibraryInfoWrapperPass>();
2818 AU.addPreserved<GlobalsAAWrapperPass>();
2819 AU.addRequired<DominatorTreeWrapperPass>();
2820 AU.addPreserved<DominatorTreeWrapperPass>();
2821 AU.addRequired<PostDominatorTreeWrapperPass>();
2822 AU.addRequired<MemorySSAWrapperPass>();
2823 AU.addPreserved<PostDominatorTreeWrapperPass>();
2824 AU.addPreserved<MemorySSAWrapperPass>();
2825 AU.addRequired<LoopInfoWrapperPass>();
2826 AU.addPreserved<LoopInfoWrapperPass>();
2827 AU.addRequired<AssumptionCacheTracker>();
2828 }
2829};
2830
2831} // end anonymous namespace
2832
2833char DSELegacyPass::ID = 0;
2834
2835INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2836 false)
2846INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2847 false)
2848
2850 return new DSELegacyPass();
2851}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
MapVector< Instruction *, OverlapIntervalsTy > InstOverlapIntervalsTy
static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller)
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static bool isNoopIntrinsic(Instruction *I)
static ConstantRangeList getIntersectedInitRangeList(ArrayRef< ArgumentInitInfo > Args, bool CallHasNoUnwindAttr)
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static void pushMemUses(MemoryAccess *Acc, SmallVectorImpl< MemoryAccess * > &WorkList, SmallPtrSetImpl< MemoryAccess * > &Visited)
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, EarliestEscapeAnalysis &EA)
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, uint64_t PtrOffset)
Update the attributes given that a memory access is updated (the dereferenced pointer could be moved ...
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static bool hasInitializesAttr(Instruction *I)
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI, const LoopInfo &LI)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
early cse Early CSE w MemorySSA
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An immutable pass that tracks lazily created AssumptionCache objects.
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
LLVM_ABI bool onlyAccessesInaccessibleMemOrArgMem() const
Determine if the function may only access memory that is either inaccessible from the IR or pointed t...
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
LLVM_ABI Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
This class represents a list of constant ranges.
bool empty() const
Return true if this list contains no members.
LLVM_ABI ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
static DIAssignID * getDistinct(LLVMContext &Context)
DbgVariableFragmentInfo FragmentInfo
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
CaptureComponents getCapturesBefore(const Value *Object, const Instruction *I, bool OrAt) override
Return how Object may be captured before instruction I, considering only provenance captures.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
bool isEquality() const
Return true if this predicate is either EQ or NE.
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
const_iterator begin() const
bool empty() const
empty - Return true when no intervals are mapped.
const_iterator end() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool hasValue() const
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Value * getLength() const
Value * getValue() const
BasicBlock * getBlock() const
Definition MemorySSA.h:162
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
void setOptimized(MemoryAccess *MA)
Definition MemorySSA.h:392
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1039
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:979
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI MemorySSAWalker * getSkipSelfWalker()
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Value * getAddr() const
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
iterator begin() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
iterator_range< use_iterator > uses()
Definition Value.h:381
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgVariableRecord *DVRAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
@ Uninitialized
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
AllocFnKind
Definition Attributes.h:53
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1726
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< po_iterator< T > > post_order(const T &G)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:406
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool canReplacePointersIfEqual(const Value *From, const Value *To, const DataLayout &DL)
Returns true if a pointer value From can be replaced with another pointer value \To if they are deeme...
Definition Loads.cpp:879
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition Alignment.h:186
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI FunctionPass * createDeadStoreEliminationPass()
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:359
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:355
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.