LLVM 23.0.0git
SROA.cpp
Go to the documentation of this file.
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/iterator.h"
44#include "llvm/Analysis/Loads.h"
47#include "llvm/Config/llvm-config.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DIBuilder.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalAlias.h"
60#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/Metadata.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
69#include "llvm/IR/PassManager.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/User.h"
73#include "llvm/IR/Value.h"
74#include "llvm/IR/ValueHandle.h"
76#include "llvm/Pass.h"
80#include "llvm/Support/Debug.h"
88#include <algorithm>
89#include <cassert>
90#include <cstddef>
91#include <cstdint>
92#include <cstring>
93#include <iterator>
94#include <queue>
95#include <string>
96#include <tuple>
97#include <utility>
98#include <variant>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "sroa"
104
105STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
113STATISTIC(NumLoadsPredicated,
114 "Number of loads rewritten into predicated loads to allow promotion");
116 NumStoresPredicated,
117 "Number of stores rewritten into predicated loads to allow promotion");
118STATISTIC(NumDeleted, "Number of instructions deleted");
119STATISTIC(NumVectorized, "Number of vectorized aggregates");
120
121namespace llvm {
122/// Disable running mem2reg during SROA in order to test or debug SROA.
123static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
124 cl::Hidden);
126} // namespace llvm
127
128namespace {
129
130class AllocaSliceRewriter;
131class AllocaSlices;
132class Partition;
133
134class SelectHandSpeculativity {
135 unsigned char Storage = 0; // None are speculatable by default.
136 using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
137 using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
138public:
139 SelectHandSpeculativity() = default;
140 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
141 bool isSpeculatable(bool isTrueVal) const;
142 bool areAllSpeculatable() const;
143 bool areAnySpeculatable() const;
144 bool areNoneSpeculatable() const;
145 // For interop as int half of PointerIntPair.
146 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
147 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
148};
149static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
150
151using PossiblySpeculatableLoad =
153using UnspeculatableStore = StoreInst *;
154using RewriteableMemOp =
155 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
156using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
157
158/// An optimization pass providing Scalar Replacement of Aggregates.
159///
160/// This pass takes allocations which can be completely analyzed (that is, they
161/// don't escape) and tries to turn them into scalar SSA values. There are
162/// a few steps to this process.
163///
164/// 1) It takes allocations of aggregates and analyzes the ways in which they
165/// are used to try to split them into smaller allocations, ideally of
166/// a single scalar data type. It will split up memcpy and memset accesses
167/// as necessary and try to isolate individual scalar accesses.
168/// 2) It will transform accesses into forms which are suitable for SSA value
169/// promotion. This can be replacing a memset with a scalar store of an
170/// integer value, or it can involve speculating operations on a PHI or
171/// select to be a PHI or select of the results.
172/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
173/// onto insert and extract operations on a vector value, and convert them to
174/// this form. By doing so, it will enable promotion of vector aggregates to
175/// SSA vector values.
176class SROA {
177 LLVMContext *const C;
178 DomTreeUpdater *const DTU;
179 AssumptionCache *const AC;
180 const bool PreserveCFG;
181 const bool AggregateToVector;
182
183 /// Worklist of alloca instructions to simplify.
184 ///
185 /// Each alloca in the function is added to this. Each new alloca formed gets
186 /// added to it as well to recursively simplify unless that alloca can be
187 /// directly promoted. Finally, each time we rewrite a use of an alloca other
188 /// the one being actively rewritten, we add it back onto the list if not
189 /// already present to ensure it is re-visited.
190 SmallSetVector<AllocaInst *, 16> Worklist;
191
192 /// A collection of instructions to delete.
193 /// We try to batch deletions to simplify code and make things a bit more
194 /// efficient. We also make sure there is no dangling pointers.
195 SmallVector<WeakVH, 8> DeadInsts;
196
197 /// Post-promotion worklist.
198 ///
199 /// Sometimes we discover an alloca which has a high probability of becoming
200 /// viable for SROA after a round of promotion takes place. In those cases,
201 /// the alloca is enqueued here for re-processing.
202 ///
203 /// Note that we have to be very careful to clear allocas out of this list in
204 /// the event they are deleted.
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
206
207 /// A collection of alloca instructions we can directly promote.
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
210 PromotableAllocas;
211
212 /// A worklist of PHIs to speculate prior to promoting allocas.
213 ///
214 /// All of these PHIs have been checked for the safety of speculation and by
215 /// being speculated will allow promoting allocas currently in the promotable
216 /// queue.
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
218
219 /// A worklist of select instructions to rewrite prior to promoting
220 /// allocas.
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
222
223 /// Select instructions that use an alloca and are subsequently loaded can be
224 /// rewritten to load both input pointers and then select between the result,
225 /// allowing the load of the alloca to be promoted.
226 /// From this:
227 /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
228 /// %V = load <type>, ptr %P2
229 /// to:
230 /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
231 /// %V2 = load <type>, ptr %Other
232 /// %V = select i1 %cond, <type> %V1, <type> %V2
233 ///
234 /// We can do this to a select if its only uses are loads
235 /// and if either the operand to the select can be loaded unconditionally,
236 /// or if we are allowed to perform CFG modifications.
237 /// If found an intervening bitcast with a single use of the load,
238 /// allow the promotion.
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
241
242public:
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 SROAOptions Options)
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(Options.CFG == SROAOptions::PreserveCFG),
247 AggregateToVector(Options.AggregateToVector) {}
248
249 /// Main run method used by both the SROAPass and by the legacy pass.
250 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
251
252private:
253 friend class AllocaSliceRewriter;
254
255 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
256 std::pair<AllocaInst *, uint64_t>
257 rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
258 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
259 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
260 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
261 void clobberUse(Use &U);
262 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
263 bool promoteAllocas();
264};
265
266} // end anonymous namespace
267
268/// Calculate the fragment of a variable to use when slicing a store
269/// based on the slice dimensions, existing fragment, and base storage
270/// fragment.
271/// Results:
272/// UseFrag - Use Target as the new fragment.
273/// UseNoFrag - The new slice already covers the whole variable.
274/// Skip - The new alloca slice doesn't include this variable.
275/// FIXME: Can we use calculateFragmentIntersect instead?
276namespace {
277enum FragCalcResult { UseFrag, UseNoFrag, Skip };
278}
279static FragCalcResult
281 uint64_t NewStorageSliceOffsetInBits,
282 uint64_t NewStorageSliceSizeInBits,
283 std::optional<DIExpression::FragmentInfo> StorageFragment,
284 std::optional<DIExpression::FragmentInfo> CurrentFragment,
286 // If the base storage describes part of the variable apply the offset and
287 // the size constraint.
288 if (StorageFragment) {
289 Target.SizeInBits =
290 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
291 Target.OffsetInBits =
292 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
293 } else {
294 Target.SizeInBits = NewStorageSliceSizeInBits;
295 Target.OffsetInBits = NewStorageSliceOffsetInBits;
296 }
297
298 // If this slice extracts the entirety of an independent variable from a
299 // larger alloca, do not produce a fragment expression, as the variable is
300 // not fragmented.
301 if (!CurrentFragment) {
302 if (auto Size = Variable->getSizeInBits()) {
303 // Treat the current fragment as covering the whole variable.
304 CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
305 if (Target == CurrentFragment)
306 return UseNoFrag;
307 }
308 }
309
310 // No additional work to do if there isn't a fragment already, or there is
311 // but it already exactly describes the new assignment.
312 if (!CurrentFragment || *CurrentFragment == Target)
313 return UseFrag;
314
315 // Reject the target fragment if it doesn't fit wholly within the current
316 // fragment. TODO: We could instead chop up the target to fit in the case of
317 // a partial overlap.
318 if (Target.startInBits() < CurrentFragment->startInBits() ||
319 Target.endInBits() > CurrentFragment->endInBits())
320 return Skip;
321
322 // Target fits within the current fragment, return it.
323 return UseFrag;
324}
325
327 return DebugVariable(DVR->getVariable(), std::nullopt,
328 DVR->getDebugLoc().getInlinedAt());
329}
330
331/// Find linked dbg.assign and generate a new one with the correct
332/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
333/// value component is copied from the old dbg.assign to the new.
334/// \param OldAlloca Alloca for the variable before splitting.
335/// \param IsSplit True if the store (not necessarily alloca)
336/// is being split.
337/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
338/// \param SliceSizeInBits New number of bits being written to.
339/// \param OldInst Instruction that is being split.
340/// \param Inst New instruction performing this part of the
341/// split store.
342/// \param Dest Store destination.
343/// \param Value Stored value.
344/// \param DL Datalayout.
345static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
346 uint64_t OldAllocaOffsetInBits,
347 uint64_t SliceSizeInBits, Instruction *OldInst,
348 Instruction *Inst, Value *Dest, Value *Value,
349 const DataLayout &DL) {
350 // If we want allocas to be migrated using this helper then we need to ensure
351 // that the BaseFragments map code still works. A simple solution would be
352 // to choose to always clone alloca dbg_assigns (rather than sometimes
353 // "stealing" them).
354 assert(!isa<AllocaInst>(Inst) && "Unexpected alloca");
355
356 auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
357 // Nothing to do if OldInst has no linked dbg.assign intrinsics.
358 if (DVRAssignMarkerRange.empty())
359 return;
360
361 LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
362 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
363 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
364 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
365 << "\n");
366 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
367 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
368 LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
369 LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n");
370 if (Value)
371 LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
372
373 /// Map of aggregate variables to their fragment associated with OldAlloca.
375 BaseFragments;
376 for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca))
377 BaseFragments[getAggregateVariable(DVR)] =
378 DVR->getExpression()->getFragmentInfo();
379
380 // The new inst needs a DIAssignID unique metadata tag (if OldInst has
381 // one). It shouldn't already have one: assert this assumption.
382 assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
383 DIAssignID *NewID = nullptr;
384 auto &Ctx = Inst->getContext();
385 DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
386 assert(OldAlloca->isStaticAlloca());
387
388 auto MigrateDbgAssign = [&](DbgVariableRecord *DbgAssign) {
389 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
390 << "\n");
391 auto *Expr = DbgAssign->getExpression();
392 bool SetKillLocation = false;
393
394 if (IsSplit) {
395 std::optional<DIExpression::FragmentInfo> BaseFragment;
396 {
397 auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
398 if (R == BaseFragments.end())
399 return;
400 BaseFragment = R->second;
401 }
402 std::optional<DIExpression::FragmentInfo> CurrentFragment =
403 Expr->getFragmentInfo();
404 DIExpression::FragmentInfo NewFragment;
405 FragCalcResult Result = calculateFragment(
406 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
407 BaseFragment, CurrentFragment, NewFragment);
408
409 if (Result == Skip)
410 return;
411 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
412 if (CurrentFragment) {
413 // Rewrite NewFragment to be relative to the existing one (this is
414 // what createFragmentExpression wants). CalculateFragment has
415 // already resolved the size for us. FIXME: Should it return the
416 // relative fragment too?
417 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
418 }
419 // Add the new fragment info to the existing expression if possible.
421 Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
422 Expr = *E;
423 } else {
424 // Otherwise, add the new fragment info to an empty expression and
425 // discard the value component of this dbg.assign as the value cannot
426 // be computed with the new fragment.
428 DIExpression::get(Expr->getContext(), {}),
429 NewFragment.OffsetInBits, NewFragment.SizeInBits);
430 SetKillLocation = true;
431 }
432 }
433 }
434
435 // If we haven't created a DIAssignID ID do that now and attach it to Inst.
436 if (!NewID) {
437 NewID = DIAssignID::getDistinct(Ctx);
438 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
439 }
440
441 DbgVariableRecord *NewAssign;
442 if (IsSplit) {
443 ::Value *NewValue = Value ? Value : DbgAssign->getValue();
445 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
446 Dest, DIExpression::get(Expr->getContext(), {}),
447 DbgAssign->getDebugLoc())));
448 } else {
449 // The store is not split, simply steal the existing dbg_assign.
450 NewAssign = DbgAssign;
451 NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs?
452 NewAssign->setAddress(Dest);
453 if (Value)
454 NewAssign->replaceVariableLocationOp(0u, Value);
455 assert(Expr == NewAssign->getExpression());
456 }
457
458 // If we've updated the value but the original dbg.assign has an arglist
459 // then kill it now - we can't use the requested new value.
460 // We can't replace the DIArgList with the new value as it'd leave
461 // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
462 // an arglist). And we can't keep the DIArgList in case the linked store
463 // is being split - in which case the DIArgList + expression may no longer
464 // be computing the correct value.
465 // This should be a very rare situation as it requires the value being
466 // stored to differ from the dbg.assign (i.e., the value has been
467 // represented differently in the debug intrinsic for some reason).
468 SetKillLocation |=
469 Value && (DbgAssign->hasArgList() ||
470 !DbgAssign->getExpression()->isSingleLocationExpression());
471 if (SetKillLocation)
472 NewAssign->setKillLocation();
473
474 // We could use more precision here at the cost of some additional (code)
475 // complexity - if the original dbg.assign was adjacent to its store, we
476 // could position this new dbg.assign adjacent to its store rather than the
477 // old dbg.assgn. That would result in interleaved dbg.assigns rather than
478 // what we get now:
479 // split store !1
480 // split store !2
481 // dbg.assign !1
482 // dbg.assign !2
483 // This (current behaviour) results results in debug assignments being
484 // noted as slightly offset (in code) from the store. In practice this
485 // should have little effect on the debugging experience due to the fact
486 // that all the split stores should get the same line number.
487 if (NewAssign != DbgAssign) {
488 NewAssign->moveBefore(DbgAssign->getIterator());
489 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
490 }
491 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
492 };
493
494 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
495}
496
497namespace {
498
499/// A custom IRBuilder inserter which prefixes all names, but only in
500/// Assert builds.
501class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
502 std::string Prefix;
503
504 Twine getNameWithPrefix(const Twine &Name) const {
505 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
506 }
507
508public:
509 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
510
511 void InsertHelper(Instruction *I, const Twine &Name,
512 BasicBlock::iterator InsertPt) const override {
513 IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name),
514 InsertPt);
515 }
516};
517
518/// Provide a type for IRBuilder that drops names in release builds.
520
521/// A used slice of an alloca.
522///
523/// This structure represents a slice of an alloca used by some instruction. It
524/// stores both the begin and end offsets of this use, a pointer to the use
525/// itself, and a flag indicating whether we can classify the use as splittable
526/// or not when forming partitions of the alloca.
527class Slice {
528 /// The beginning offset of the range.
529 uint64_t BeginOffset = 0;
530
531 /// The ending offset, not included in the range.
532 uint64_t EndOffset = 0;
533
534 /// Storage for both the use of this slice and whether it can be
535 /// split.
536 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
537
538public:
539 Slice() = default;
540
541 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
542 : BeginOffset(BeginOffset), EndOffset(EndOffset),
543 UseAndIsSplittable(U, IsSplittable) {}
544
545 uint64_t beginOffset() const { return BeginOffset; }
546 uint64_t endOffset() const { return EndOffset; }
547
548 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
549 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
550
551 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
552
553 bool isDead() const { return getUse() == nullptr; }
554 void kill() { UseAndIsSplittable.setPointer(nullptr); }
555
556 /// Support for ordering ranges.
557 ///
558 /// This provides an ordering over ranges such that start offsets are
559 /// always increasing, and within equal start offsets, the end offsets are
560 /// decreasing. Thus the spanning range comes first in a cluster with the
561 /// same start position.
562 bool operator<(const Slice &RHS) const {
563 if (beginOffset() < RHS.beginOffset())
564 return true;
565 if (beginOffset() > RHS.beginOffset())
566 return false;
567 if (isSplittable() != RHS.isSplittable())
568 return !isSplittable();
569 if (endOffset() > RHS.endOffset())
570 return true;
571 return false;
572 }
573
574 /// Support comparison with a single offset to allow binary searches.
575 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
576 return LHS.beginOffset() < RHSOffset;
577 }
578 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
579 return LHSOffset < RHS.beginOffset();
580 }
581
582 bool operator==(const Slice &RHS) const {
583 return isSplittable() == RHS.isSplittable() &&
584 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
585 }
586 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
587};
588
589/// Representation of the alloca slices.
590///
591/// This class represents the slices of an alloca which are formed by its
592/// various uses. If a pointer escapes, we can't fully build a representation
593/// for the slices used and we reflect that in this structure. The uses are
594/// stored, sorted by increasing beginning offset and with unsplittable slices
595/// starting at a particular offset before splittable slices.
596class AllocaSlices {
597public:
598 /// Construct the slices of a particular alloca.
599 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
600
601 /// Test whether a pointer to the allocation escapes our analysis.
602 ///
603 /// If this is true, the slices are never fully built and should be
604 /// ignored.
605 bool isEscaped() const { return PointerEscapingInstr; }
606 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
607
608 /// Support for iterating over the slices.
609 /// @{
610 using iterator = SmallVectorImpl<Slice>::iterator;
611 using range = iterator_range<iterator>;
612
613 iterator begin() { return Slices.begin(); }
614 iterator end() { return Slices.end(); }
615
616 using const_iterator = SmallVectorImpl<Slice>::const_iterator;
617 using const_range = iterator_range<const_iterator>;
618
619 const_iterator begin() const { return Slices.begin(); }
620 const_iterator end() const { return Slices.end(); }
621 /// @}
622
623 /// Erase a range of slices.
624 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
625
626 /// Insert new slices for this alloca.
627 ///
628 /// This moves the slices into the alloca's slices collection, and re-sorts
629 /// everything so that the usual ordering properties of the alloca's slices
630 /// hold.
631 void insert(ArrayRef<Slice> NewSlices) {
632 int OldSize = Slices.size();
633 Slices.append(NewSlices.begin(), NewSlices.end());
634 auto SliceI = Slices.begin() + OldSize;
635 std::stable_sort(SliceI, Slices.end());
636 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
637 }
638
639 // Forward declare the iterator and range accessor for walking the
640 // partitions.
641 class partition_iterator;
643
644 /// Access the dead users for this alloca.
645 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
646
647 /// Access Uses that should be dropped if the alloca is promotable.
648 ArrayRef<Use *> getDeadUsesIfPromotable() const {
649 return DeadUseIfPromotable;
650 }
651
652 /// Access the dead operands referring to this alloca.
653 ///
654 /// These are operands which have cannot actually be used to refer to the
655 /// alloca as they are outside its range and the user doesn't correct for
656 /// that. These mostly consist of PHI node inputs and the like which we just
657 /// need to replace with undef.
658 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
659
660#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
661 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
662 void printSlice(raw_ostream &OS, const_iterator I,
663 StringRef Indent = " ") const;
664 void printUse(raw_ostream &OS, const_iterator I,
665 StringRef Indent = " ") const;
666 void print(raw_ostream &OS) const;
667 void dump(const_iterator I) const;
668 void dump() const;
669#endif
670
671private:
672 template <typename DerivedT, typename RetT = void> class BuilderBase;
673 class SliceBuilder;
674
675 friend class AllocaSlices::SliceBuilder;
676
677#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
678 /// Handle to alloca instruction to simplify method interfaces.
679 AllocaInst &AI;
680#endif
681
682 /// The instruction responsible for this alloca not having a known set
683 /// of slices.
684 ///
685 /// When an instruction (potentially) escapes the pointer to the alloca, we
686 /// store a pointer to that here and abort trying to form slices of the
687 /// alloca. This will be null if the alloca slices are analyzed successfully.
688 Instruction *PointerEscapingInstr;
689 Instruction *PointerEscapingInstrReadOnly;
690
691 /// The slices of the alloca.
692 ///
693 /// We store a vector of the slices formed by uses of the alloca here. This
694 /// vector is sorted by increasing begin offset, and then the unsplittable
695 /// slices before the splittable ones. See the Slice inner class for more
696 /// details.
698
699 /// Instructions which will become dead if we rewrite the alloca.
700 ///
701 /// Note that these are not separated by slice. This is because we expect an
702 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
703 /// all these instructions can simply be removed and replaced with poison as
704 /// they come from outside of the allocated space.
705 SmallVector<Instruction *, 8> DeadUsers;
706
707 /// Uses which will become dead if can promote the alloca.
708 SmallVector<Use *, 8> DeadUseIfPromotable;
709
710 /// Operands which will become dead if we rewrite the alloca.
711 ///
712 /// These are operands that in their particular use can be replaced with
713 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
714 /// to PHI nodes and the like. They aren't entirely dead (there might be
715 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
716 /// want to swap this particular input for poison to simplify the use lists of
717 /// the alloca.
718 SmallVector<Use *, 8> DeadOperands;
719};
720
721/// A partition of the slices.
722///
723/// An ephemeral representation for a range of slices which can be viewed as
724/// a partition of the alloca. This range represents a span of the alloca's
725/// memory which cannot be split, and provides access to all of the slices
726/// overlapping some part of the partition.
727///
728/// Objects of this type are produced by traversing the alloca's slices, but
729/// are only ephemeral and not persistent.
730class Partition {
731private:
732 friend class AllocaSlices;
733 friend class AllocaSlices::partition_iterator;
734
735 using iterator = AllocaSlices::iterator;
736
737 /// The beginning and ending offsets of the alloca for this
738 /// partition.
739 uint64_t BeginOffset = 0, EndOffset = 0;
740
741 /// The start and end iterators of this partition.
742 iterator SI, SJ;
743
744 /// A collection of split slice tails overlapping the partition.
745 SmallVector<Slice *, 4> SplitTails;
746
747 /// Raw constructor builds an empty partition starting and ending at
748 /// the given iterator.
749 Partition(iterator SI) : SI(SI), SJ(SI) {}
750
751public:
752 /// The start offset of this partition.
753 ///
754 /// All of the contained slices start at or after this offset.
755 uint64_t beginOffset() const { return BeginOffset; }
756
757 /// The end offset of this partition.
758 ///
759 /// All of the contained slices end at or before this offset.
760 uint64_t endOffset() const { return EndOffset; }
761
762 /// The size of the partition.
763 ///
764 /// Note that this can never be zero.
765 uint64_t size() const {
766 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
767 return EndOffset - BeginOffset;
768 }
769
770 /// Test whether this partition contains no slices, and merely spans
771 /// a region occupied by split slices.
772 bool empty() const { return SI == SJ; }
773
774 /// \name Iterate slices that start within the partition.
775 /// These may be splittable or unsplittable. They have a begin offset >= the
776 /// partition begin offset.
777 /// @{
778 // FIXME: We should probably define a "concat_iterator" helper and use that
779 // to stitch together pointee_iterators over the split tails and the
780 // contiguous iterators of the partition. That would give a much nicer
781 // interface here. We could then additionally expose filtered iterators for
782 // split, unsplit, and unsplittable splices based on the usage patterns.
783 iterator begin() const { return SI; }
784 iterator end() const { return SJ; }
785 /// @}
786
787 /// Get the sequence of split slice tails.
788 ///
789 /// These tails are of slices which start before this partition but are
790 /// split and overlap into the partition. We accumulate these while forming
791 /// partitions.
792 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
793};
794
795} // end anonymous namespace
796
797/// An iterator over partitions of the alloca's slices.
798///
799/// This iterator implements the core algorithm for partitioning the alloca's
800/// slices. It is a forward iterator as we don't support backtracking for
801/// efficiency reasons, and re-use a single storage area to maintain the
802/// current set of split slices.
803///
804/// It is templated on the slice iterator type to use so that it can operate
805/// with either const or non-const slice iterators.
807 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
808 Partition> {
809 friend class AllocaSlices;
810
811 /// Most of the state for walking the partitions is held in a class
812 /// with a nice interface for examining them.
813 Partition P;
814
815 /// We need to keep the end of the slices to know when to stop.
816 AllocaSlices::iterator SE;
817
818 /// We also need to keep track of the maximum split end offset seen.
819 /// FIXME: Do we really?
820 uint64_t MaxSplitSliceEndOffset = 0;
821
822 /// Sets the partition to be empty at given iterator, and sets the
823 /// end iterator.
824 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
825 : P(SI), SE(SE) {
826 // If not already at the end, advance our state to form the initial
827 // partition.
828 if (SI != SE)
829 advance();
830 }
831
832 /// Advance the iterator to the next partition.
833 ///
834 /// Requires that the iterator not be at the end of the slices.
835 void advance() {
836 assert((P.SI != SE || !P.SplitTails.empty()) &&
837 "Cannot advance past the end of the slices!");
838
839 // Clear out any split uses which have ended.
840 if (!P.SplitTails.empty()) {
841 if (P.EndOffset >= MaxSplitSliceEndOffset) {
842 // If we've finished all splits, this is easy.
843 P.SplitTails.clear();
844 MaxSplitSliceEndOffset = 0;
845 } else {
846 // Remove the uses which have ended in the prior partition. This
847 // cannot change the max split slice end because we just checked that
848 // the prior partition ended prior to that max.
849 llvm::erase_if(P.SplitTails,
850 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
851 assert(llvm::any_of(P.SplitTails,
852 [&](Slice *S) {
853 return S->endOffset() == MaxSplitSliceEndOffset;
854 }) &&
855 "Could not find the current max split slice offset!");
856 assert(llvm::all_of(P.SplitTails,
857 [&](Slice *S) {
858 return S->endOffset() <= MaxSplitSliceEndOffset;
859 }) &&
860 "Max split slice end offset is not actually the max!");
861 }
862 }
863
864 // If P.SI is already at the end, then we've cleared the split tail and
865 // now have an end iterator.
866 if (P.SI == SE) {
867 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
868 return;
869 }
870
871 // If we had a non-empty partition previously, set up the state for
872 // subsequent partitions.
873 if (P.SI != P.SJ) {
874 // Accumulate all the splittable slices which started in the old
875 // partition into the split list.
876 for (Slice &S : P)
877 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
878 P.SplitTails.push_back(&S);
879 MaxSplitSliceEndOffset =
880 std::max(S.endOffset(), MaxSplitSliceEndOffset);
881 }
882
883 // Start from the end of the previous partition.
884 P.SI = P.SJ;
885
886 // If P.SI is now at the end, we at most have a tail of split slices.
887 if (P.SI == SE) {
888 P.BeginOffset = P.EndOffset;
889 P.EndOffset = MaxSplitSliceEndOffset;
890 return;
891 }
892
893 // If the we have split slices and the next slice is after a gap and is
894 // not splittable immediately form an empty partition for the split
895 // slices up until the next slice begins.
896 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
897 !P.SI->isSplittable()) {
898 P.BeginOffset = P.EndOffset;
899 P.EndOffset = P.SI->beginOffset();
900 return;
901 }
902 }
903
904 // OK, we need to consume new slices. Set the end offset based on the
905 // current slice, and step SJ past it. The beginning offset of the
906 // partition is the beginning offset of the next slice unless we have
907 // pre-existing split slices that are continuing, in which case we begin
908 // at the prior end offset.
909 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
910 P.EndOffset = P.SI->endOffset();
911 ++P.SJ;
912
913 // There are two strategies to form a partition based on whether the
914 // partition starts with an unsplittable slice or a splittable slice.
915 if (!P.SI->isSplittable()) {
916 // When we're forming an unsplittable region, it must always start at
917 // the first slice and will extend through its end.
918 assert(P.BeginOffset == P.SI->beginOffset());
919
920 // Form a partition including all of the overlapping slices with this
921 // unsplittable slice.
922 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
923 if (!P.SJ->isSplittable())
924 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
925 ++P.SJ;
926 }
927
928 // We have a partition across a set of overlapping unsplittable
929 // partitions.
930 return;
931 }
932
933 // If we're starting with a splittable slice, then we need to form
934 // a synthetic partition spanning it and any other overlapping splittable
935 // splices.
936 assert(P.SI->isSplittable() && "Forming a splittable partition!");
937
938 // Collect all of the overlapping splittable slices.
939 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
940 P.SJ->isSplittable()) {
941 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
942 ++P.SJ;
943 }
944
945 // Back upiP.EndOffset if we ended the span early when encountering an
946 // unsplittable slice. This synthesizes the early end offset of
947 // a partition spanning only splittable slices.
948 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
949 assert(!P.SJ->isSplittable());
950 P.EndOffset = P.SJ->beginOffset();
951 }
952 }
953
954public:
955 bool operator==(const partition_iterator &RHS) const {
956 assert(SE == RHS.SE &&
957 "End iterators don't match between compared partition iterators!");
958
959 // The observed positions of partitions is marked by the P.SI iterator and
960 // the emptiness of the split slices. The latter is only relevant when
961 // P.SI == SE, as the end iterator will additionally have an empty split
962 // slices list, but the prior may have the same P.SI and a tail of split
963 // slices.
964 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
965 assert(P.SJ == RHS.P.SJ &&
966 "Same set of slices formed two different sized partitions!");
967 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
968 "Same slice position with differently sized non-empty split "
969 "slice tails!");
970 return true;
971 }
972 return false;
973 }
974
975 partition_iterator &operator++() {
976 advance();
977 return *this;
978 }
979
980 Partition &operator*() { return P; }
981};
982
983/// A forward range over the partitions of the alloca's slices.
984///
985/// This accesses an iterator range over the partitions of the alloca's
986/// slices. It computes these partitions on the fly based on the overlapping
987/// offsets of the slices and the ability to split them. It will visit "empty"
988/// partitions to cover regions of the alloca only accessed via split
989/// slices.
990iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
991 return make_range(partition_iterator(begin(), end()),
992 partition_iterator(end(), end()));
993}
994
996 // If the condition being selected on is a constant or the same value is
997 // being selected between, fold the select. Yes this does (rarely) happen
998 // early on.
999 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
1000 return SI.getOperand(1 + CI->isZero());
1001 if (SI.getOperand(1) == SI.getOperand(2))
1002 return SI.getOperand(1);
1003
1004 return nullptr;
1005}
1006
1007/// A helper that folds a PHI node or a select.
1009 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
1010 // If PN merges together the same value, return that value.
1011 return PN->hasConstantValue();
1012 }
1014}
1015
1016/// Builder for the alloca slices.
1017///
1018/// This class builds a set of alloca slices by recursively visiting the uses
1019/// of an alloca and making a slice for each load and store at each offset.
1020class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1021 friend class PtrUseVisitor<SliceBuilder>;
1022 friend class InstVisitor<SliceBuilder>;
1023
1024 using Base = PtrUseVisitor<SliceBuilder>;
1025
1026 const uint64_t AllocSize;
1027 AllocaSlices &AS;
1028
1029 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
1031
1032 /// Set to de-duplicate dead instructions found in the use walk.
1033 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
1034
1035public:
1036 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1038 AllocSize(AI.getAllocationSize(DL)->getFixedValue()), AS(AS) {}
1039
1040private:
1041 void markAsDead(Instruction &I) {
1042 if (VisitedDeadInsts.insert(&I).second)
1043 AS.DeadUsers.push_back(&I);
1044 }
1045
1046 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
1047 bool IsSplittable = false) {
1048 // Completely skip uses which have a zero size or start either before or
1049 // past the end of the allocation.
1050 if (Size == 0 || Offset.uge(AllocSize)) {
1051 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
1052 << Offset
1053 << " which has zero size or starts outside of the "
1054 << AllocSize << " byte alloca:\n"
1055 << " alloca: " << AS.AI << "\n"
1056 << " use: " << I << "\n");
1057 return markAsDead(I);
1058 }
1059
1060 uint64_t BeginOffset = Offset.getZExtValue();
1061 uint64_t EndOffset = BeginOffset + Size;
1062
1063 // Clamp the end offset to the end of the allocation. Note that this is
1064 // formulated to handle even the case where "BeginOffset + Size" overflows.
1065 // This may appear superficially to be something we could ignore entirely,
1066 // but that is not so! There may be widened loads or PHI-node uses where
1067 // some instructions are dead but not others. We can't completely ignore
1068 // them, and so have to record at least the information here.
1069 assert(AllocSize >= BeginOffset); // Established above.
1070 if (Size > AllocSize - BeginOffset) {
1071 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1072 << Offset << " to remain within the " << AllocSize
1073 << " byte alloca:\n"
1074 << " alloca: " << AS.AI << "\n"
1075 << " use: " << I << "\n");
1076 EndOffset = AllocSize;
1077 }
1078
1079 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1080 }
1081
1082 void visitBitCastInst(BitCastInst &BC) {
1083 if (BC.use_empty())
1084 return markAsDead(BC);
1085
1086 return Base::visitBitCastInst(BC);
1087 }
1088
1089 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1090 if (ASC.use_empty())
1091 return markAsDead(ASC);
1092
1093 return Base::visitAddrSpaceCastInst(ASC);
1094 }
1095
1096 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1097 if (GEPI.use_empty())
1098 return markAsDead(GEPI);
1099
1100 return Base::visitGetElementPtrInst(GEPI);
1101 }
1102
1103 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1104 uint64_t Size, bool IsVolatile) {
1105 // We allow splitting of non-volatile loads and stores where the type is an
1106 // integer type. These may be used to implement 'memcpy' or other "transfer
1107 // of bits" patterns.
1108 bool IsSplittable =
1109 Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty);
1110
1111 insertUse(I, Offset, Size, IsSplittable);
1112 }
1113
1114 void visitLoadInst(LoadInst &LI) {
1115 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
1116 "All simple FCA loads should have been pre-split");
1117
1118 // If there is a load with an unknown offset, we can still perform store
1119 // to load forwarding for other known-offset loads.
1120 if (!IsOffsetKnown)
1121 return PI.setEscapedReadOnly(&LI);
1122
1123 TypeSize Size = DL.getTypeStoreSize(LI.getType());
1124 if (Size.isScalable()) {
1125 unsigned VScale = LI.getFunction()->getVScaleValue();
1126 if (!VScale)
1127 return PI.setAborted(&LI);
1128
1129 Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale);
1130 }
1131
1132 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1133 LI.isVolatile());
1134 }
1135
1136 void visitStoreInst(StoreInst &SI) {
1137 Value *ValOp = SI.getValueOperand();
1138 if (ValOp == *U)
1139 return PI.setEscapedAndAborted(&SI);
1140 if (!IsOffsetKnown)
1141 return PI.setAborted(&SI);
1142
1143 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1144 if (StoreSize.isScalable()) {
1145 unsigned VScale = SI.getFunction()->getVScaleValue();
1146 if (!VScale)
1147 return PI.setAborted(&SI);
1148
1149 StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale);
1150 }
1151
1152 uint64_t Size = StoreSize.getFixedValue();
1153
1154 // If this memory access can be shown to *statically* extend outside the
1155 // bounds of the allocation, it's behavior is undefined, so simply
1156 // ignore it. Note that this is more strict than the generic clamping
1157 // behavior of insertUse. We also try to handle cases which might run the
1158 // risk of overflow.
1159 // FIXME: We should instead consider the pointer to have escaped if this
1160 // function is being instrumented for addressing bugs or race conditions.
1161 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1162 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1163 << Offset << " which extends past the end of the "
1164 << AllocSize << " byte alloca:\n"
1165 << " alloca: " << AS.AI << "\n"
1166 << " use: " << SI << "\n");
1167 return markAsDead(SI);
1168 }
1169
1170 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
1171 "All simple FCA stores should have been pre-split");
1172 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
1173 }
1174
1175 void visitMemSetInst(MemSetInst &II) {
1176 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1177 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1178 if ((Length && Length->getValue() == 0) ||
1179 (IsOffsetKnown && Offset.uge(AllocSize)))
1180 // Zero-length mem transfer intrinsics can be ignored entirely.
1181 return markAsDead(II);
1182
1183 if (!IsOffsetKnown)
1184 return PI.setAborted(&II);
1185
1186 insertUse(II, Offset,
1187 Length ? Length->getLimitedValue()
1188 : AllocSize - Offset.getLimitedValue(),
1189 (bool)Length);
1190 }
1191
1192 void visitMemTransferInst(MemTransferInst &II) {
1193 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1194 if (Length && Length->getValue() == 0)
1195 // Zero-length mem transfer intrinsics can be ignored entirely.
1196 return markAsDead(II);
1197
1198 // Because we can visit these intrinsics twice, also check to see if the
1199 // first time marked this instruction as dead. If so, skip it.
1200 if (VisitedDeadInsts.count(&II))
1201 return;
1202
1203 if (!IsOffsetKnown)
1204 return PI.setAborted(&II);
1205
1206 // This side of the transfer is completely out-of-bounds, and so we can
1207 // nuke the entire transfer. However, we also need to nuke the other side
1208 // if already added to our partitions.
1209 // FIXME: Yet another place we really should bypass this when
1210 // instrumenting for ASan.
1211 if (Offset.uge(AllocSize)) {
1212 auto MTPI = MemTransferSliceMap.find(&II);
1213 if (MTPI != MemTransferSliceMap.end())
1214 AS.Slices[MTPI->second].kill();
1215 return markAsDead(II);
1216 }
1217
1218 uint64_t RawOffset = Offset.getLimitedValue();
1219 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1220
1221 // Check for the special case where the same exact value is used for both
1222 // source and dest.
1223 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1224 // For non-volatile transfers this is a no-op.
1225 if (!II.isVolatile())
1226 return markAsDead(II);
1227
1228 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
1229 }
1230
1231 // If we have seen both source and destination for a mem transfer, then
1232 // they both point to the same alloca.
1233 bool Inserted;
1234 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1235 std::tie(MTPI, Inserted) =
1236 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1237 unsigned PrevIdx = MTPI->second;
1238 if (!Inserted) {
1239 Slice &PrevP = AS.Slices[PrevIdx];
1240
1241 // Check if the begin offsets match and this is a non-volatile transfer.
1242 // In that case, we can completely elide the transfer.
1243 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1244 PrevP.kill();
1245 return markAsDead(II);
1246 }
1247
1248 // Otherwise we have an offset transfer within the same alloca. We can't
1249 // split those.
1250 PrevP.makeUnsplittable();
1251 }
1252
1253 // Insert the use now that we've fixed up the splittable nature.
1254 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
1255
1256 // Check that we ended up with a valid index in the map.
1257 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1258 "Map index doesn't point back to a slice with this user.");
1259 }
1260
1261 // Disable SRoA for any intrinsics except for lifetime invariants.
1262 // FIXME: What about debug intrinsics? This matches old behavior, but
1263 // doesn't make sense.
1264 void visitIntrinsicInst(IntrinsicInst &II) {
1265 if (II.isDroppable()) {
1266 AS.DeadUseIfPromotable.push_back(U);
1267 return;
1268 }
1269
1270 if (!IsOffsetKnown)
1271 return PI.setAborted(&II);
1272
1273 if (II.isLifetimeStartOrEnd()) {
1274 insertUse(II, Offset, AllocSize, true);
1275 return;
1276 }
1277
1278 Base::visitIntrinsicInst(II);
1279 }
1280
1281 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1282 // We consider any PHI or select that results in a direct load or store of
1283 // the same offset to be a viable use for slicing purposes. These uses
1284 // are considered unsplittable and the size is the maximum loaded or stored
1285 // size.
1286 SmallPtrSet<Instruction *, 4> Visited;
1288 Visited.insert(Root);
1289 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1290 const DataLayout &DL = Root->getDataLayout();
1291 // If there are no loads or stores, the access is dead. We mark that as
1292 // a size zero access.
1293 Size = 0;
1294 do {
1295 Instruction *I, *UsedI;
1296 std::tie(UsedI, I) = Uses.pop_back_val();
1297
1298 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1299 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1300 if (LoadSize.isScalable()) {
1301 PI.setAborted(LI);
1302 return nullptr;
1303 }
1304 Size = std::max(Size, LoadSize.getFixedValue());
1305 continue;
1306 }
1307 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1308 Value *Op = SI->getOperand(0);
1309 if (Op == UsedI)
1310 return SI;
1311 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1312 if (StoreSize.isScalable()) {
1313 PI.setAborted(SI);
1314 return nullptr;
1315 }
1316 Size = std::max(Size, StoreSize.getFixedValue());
1317 continue;
1318 }
1319
1320 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
1321 if (!GEP->hasAllZeroIndices())
1322 return GEP;
1323 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
1325 return I;
1326 }
1327
1328 for (User *U : I->users())
1329 if (Visited.insert(cast<Instruction>(U)).second)
1330 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
1331 } while (!Uses.empty());
1332
1333 return nullptr;
1334 }
1335
1336 void visitPHINodeOrSelectInst(Instruction &I) {
1338 if (I.use_empty())
1339 return markAsDead(I);
1340
1341 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
1342 // instructions in this BB, which may be required during rewriting. Bail out
1343 // on these cases.
1344 if (isa<PHINode>(I) && !I.getParent()->hasInsertionPt())
1345 return PI.setAborted(&I);
1346
1347 // TODO: We could use simplifyInstruction here to fold PHINodes and
1348 // SelectInsts. However, doing so requires to change the current
1349 // dead-operand-tracking mechanism. For instance, suppose neither loading
1350 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1351 // trap either. However, if we simply replace %U with undef using the
1352 // current dead-operand-tracking mechanism, "load (select undef, undef,
1353 // %other)" may trap because the select may return the first operand
1354 // "undef".
1355 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1356 if (Result == *U)
1357 // If the result of the constant fold will be the pointer, recurse
1358 // through the PHI/select as if we had RAUW'ed it.
1359 enqueueUsers(I);
1360 else
1361 // Otherwise the operand to the PHI/select is dead, and we can replace
1362 // it with poison.
1363 AS.DeadOperands.push_back(U);
1364
1365 return;
1366 }
1367
1368 if (!IsOffsetKnown)
1369 return PI.setAborted(&I);
1370
1371 // See if we already have computed info on this node.
1372 uint64_t &Size = PHIOrSelectSizes[&I];
1373 if (!Size) {
1374 // This is a new PHI/Select, check for an unsafe use of it.
1375 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1376 return PI.setAborted(UnsafeI);
1377 }
1378
1379 // For PHI and select operands outside the alloca, we can't nuke the entire
1380 // phi or select -- the other side might still be relevant, so we special
1381 // case them here and use a separate structure to track the operands
1382 // themselves which should be replaced with poison.
1383 // FIXME: This should instead be escaped in the event we're instrumenting
1384 // for address sanitization.
1385 if (Offset.uge(AllocSize)) {
1386 AS.DeadOperands.push_back(U);
1387 return;
1388 }
1389
1390 insertUse(I, Offset, Size);
1391 }
1392
1393 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1394
1395 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1396
1397 /// Disable SROA entirely if there are unhandled users of the alloca.
1398 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1399
1400 void visitCallBase(CallBase &CB) {
1401 // If the call operand is read-only and only does a read-only or address
1402 // capture, then we mark it as EscapedReadOnly.
1403 if (CB.isDataOperand(U) &&
1404 !capturesFullProvenance(CB.getCaptureInfo(U->getOperandNo())) &&
1405 CB.onlyReadsMemory(U->getOperandNo())) {
1406 PI.setEscapedReadOnly(&CB);
1407 return;
1408 }
1409
1410 Base::visitCallBase(CB);
1411 }
1412};
1413
1414AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1415 :
1416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1417 AI(AI),
1418#endif
1419 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1420 SliceBuilder PB(DL, AI, *this);
1421 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1422 if (PtrI.isEscaped() || PtrI.isAborted()) {
1423 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1424 // possibly by just storing the PtrInfo in the AllocaSlices.
1425 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1426 : PtrI.getAbortingInst();
1427 assert(PointerEscapingInstr && "Did not track a bad instruction");
1428 return;
1429 }
1430 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1431
1432 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1433
1434 // Sort the uses. This arranges for the offsets to be in ascending order,
1435 // and the sizes to be in descending order.
1436 llvm::stable_sort(Slices);
1437}
1438
1439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1440
1441void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1442 StringRef Indent) const {
1443 printSlice(OS, I, Indent);
1444 OS << "\n";
1445 printUse(OS, I, Indent);
1446}
1447
1448void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1449 StringRef Indent) const {
1450 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1451 << " slice #" << (I - begin())
1452 << (I->isSplittable() ? " (splittable)" : "");
1453}
1454
1455void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1456 StringRef Indent) const {
1457 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1458}
1459
1460void AllocaSlices::print(raw_ostream &OS) const {
1461 if (PointerEscapingInstr) {
1462 OS << "Can't analyze slices for alloca: " << AI << "\n"
1463 << " A pointer to this alloca escaped by:\n"
1464 << " " << *PointerEscapingInstr << "\n";
1465 return;
1466 }
1467
1468 if (PointerEscapingInstrReadOnly)
1469 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1470
1471 OS << "Slices of alloca: " << AI << "\n";
1472 for (const_iterator I = begin(), E = end(); I != E; ++I)
1473 print(OS, I);
1474}
1475
1476LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1477 print(dbgs(), I);
1478}
1479LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1480
1481#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1482
1483/// Walk the range of a partitioning looking for a common type to cover this
1484/// sequence of slices.
1485static std::pair<Type *, IntegerType *>
1486findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1487 uint64_t EndOffset) {
1488 Type *Ty = nullptr;
1489 bool TyIsCommon = true;
1490 IntegerType *ITy = nullptr;
1491
1492 // Note that we need to look at *every* alloca slice's Use to ensure we
1493 // always get consistent results regardless of the order of slices.
1494 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1495 Use *U = I->getUse();
1496 if (isa<IntrinsicInst>(*U->getUser()))
1497 continue;
1498 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1499 continue;
1500
1501 Type *UserTy = nullptr;
1502 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1503 UserTy = LI->getType();
1504 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1505 UserTy = SI->getValueOperand()->getType();
1506 }
1507
1508 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1509 // If the type is larger than the partition, skip it. We only encounter
1510 // this for split integer operations where we want to use the type of the
1511 // entity causing the split. Also skip if the type is not a byte width
1512 // multiple.
1513 if (UserITy->getBitWidth() % 8 != 0 ||
1514 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1515 continue;
1516
1517 // Track the largest bitwidth integer type used in this way in case there
1518 // is no common type.
1519 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1520 ITy = UserITy;
1521 }
1522
1523 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1524 // depend on types skipped above.
1525 if (!UserTy || (Ty && Ty != UserTy))
1526 TyIsCommon = false; // Give up on anything but an iN type.
1527 else
1528 Ty = UserTy;
1529 }
1530
1531 return {TyIsCommon ? Ty : nullptr, ITy};
1532}
1533
1534/// PHI instructions that use an alloca and are subsequently loaded can be
1535/// rewritten to load both input pointers in the pred blocks and then PHI the
1536/// results, allowing the load of the alloca to be promoted.
1537/// From this:
1538/// %P2 = phi [i32* %Alloca, i32* %Other]
1539/// %V = load i32* %P2
1540/// to:
1541/// %V1 = load i32* %Alloca -> will be mem2reg'd
1542/// ...
1543/// %V2 = load i32* %Other
1544/// ...
1545/// %V = phi [i32 %V1, i32 %V2]
1546///
1547/// We can do this to a select if its only uses are loads and if the operands
1548/// to the select can be loaded unconditionally.
1549///
1550/// FIXME: This should be hoisted into a generic utility, likely in
1551/// Transforms/Util/Local.h
1553 const DataLayout &DL = PN.getDataLayout();
1554
1555 // For now, we can only do this promotion if the load is in the same block
1556 // as the PHI, and if there are no stores between the phi and load.
1557 // TODO: Allow recursive phi users.
1558 // TODO: Allow stores.
1559 BasicBlock *BB = PN.getParent();
1560 Align MaxAlign;
1561 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1562 Type *LoadType = nullptr;
1563 for (User *U : PN.users()) {
1565 if (!LI || !LI->isSimple())
1566 return false;
1567
1568 // For now we only allow loads in the same block as the PHI. This is
1569 // a common case that happens when instcombine merges two loads through
1570 // a PHI.
1571 if (LI->getParent() != BB)
1572 return false;
1573
1574 if (LoadType) {
1575 if (LoadType != LI->getType())
1576 return false;
1577 } else {
1578 LoadType = LI->getType();
1579 }
1580
1581 // Ensure that there are no instructions between the PHI and the load that
1582 // could store.
1583 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1584 if (BBI->mayWriteToMemory())
1585 return false;
1586
1587 MaxAlign = std::max(MaxAlign, LI->getAlign());
1588 }
1589
1590 if (!LoadType)
1591 return false;
1592
1593 APInt LoadSize =
1594 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1595
1596 // We can only transform this if it is safe to push the loads into the
1597 // predecessor blocks. The only thing to watch out for is that we can't put
1598 // a possibly trapping load in the predecessor if it is a critical edge.
1599 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1601 Value *InVal = PN.getIncomingValue(Idx);
1602
1603 // If the value is produced by the terminator of the predecessor (an
1604 // invoke) or it has side-effects, there is no valid place to put a load
1605 // in the predecessor.
1606 if (TI == InVal || TI->mayHaveSideEffects())
1607 return false;
1608
1609 // If the predecessor has a single successor, then the edge isn't
1610 // critical.
1611 if (TI->getNumSuccessors() == 1)
1612 continue;
1613
1614 // If this pointer is always safe to load, or if we can prove that there
1615 // is already a load in the block, then we can move the load to the pred
1616 // block.
1617 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1618 continue;
1619
1620 return false;
1621 }
1622
1623 return true;
1624}
1625
1626static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1627 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1628
1629 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1630 Type *LoadTy = SomeLoad->getType();
1631 IRB.SetInsertPoint(&PN);
1632 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1633 PN.getName() + ".sroa.speculated");
1634
1635 // Get the AA tags and alignment to use from one of the loads. It does not
1636 // matter which one we get and if any differ.
1637 AAMDNodes AATags = SomeLoad->getAAMetadata();
1638 Align Alignment = SomeLoad->getAlign();
1639
1640 // Rewrite all loads of the PN to use the new PHI.
1641 while (!PN.use_empty()) {
1642 LoadInst *LI = cast<LoadInst>(PN.user_back());
1643 LI->replaceAllUsesWith(NewPN);
1644 LI->eraseFromParent();
1645 }
1646
1647 // Inject loads into all of the pred blocks.
1648 DenseMap<BasicBlock *, Value *> InjectedLoads;
1649 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1650 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1651 Value *InVal = PN.getIncomingValue(Idx);
1652
1653 // A PHI node is allowed to have multiple (duplicated) entries for the same
1654 // basic block, as long as the value is the same. So if we already injected
1655 // a load in the predecessor, then we should reuse the same load for all
1656 // duplicated entries.
1657 if (Value *V = InjectedLoads.lookup(Pred)) {
1658 NewPN->addIncoming(V, Pred);
1659 continue;
1660 }
1661
1662 Instruction *TI = Pred->getTerminator();
1663 IRB.SetInsertPoint(TI);
1664
1665 LoadInst *Load = IRB.CreateAlignedLoad(
1666 LoadTy, InVal, Alignment,
1667 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1668 ++NumLoadsSpeculated;
1669 if (AATags)
1670 Load->setAAMetadata(AATags);
1671 NewPN->addIncoming(Load, Pred);
1672 InjectedLoads[Pred] = Load;
1673 }
1674
1675 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1676 PN.eraseFromParent();
1677}
1678
1679SelectHandSpeculativity &
1680SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1681 if (isTrueVal)
1683 else
1685 return *this;
1686}
1687
1688bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1689 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1690 : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
1691}
1692
1693bool SelectHandSpeculativity::areAllSpeculatable() const {
1694 return isSpeculatable(/*isTrueVal=*/true) &&
1695 isSpeculatable(/*isTrueVal=*/false);
1696}
1697
1698bool SelectHandSpeculativity::areAnySpeculatable() const {
1699 return isSpeculatable(/*isTrueVal=*/true) ||
1700 isSpeculatable(/*isTrueVal=*/false);
1701}
1702bool SelectHandSpeculativity::areNoneSpeculatable() const {
1703 return !areAnySpeculatable();
1704}
1705
1706static SelectHandSpeculativity
1708 assert(LI.isSimple() && "Only for simple loads");
1709 SelectHandSpeculativity Spec;
1710
1711 const DataLayout &DL = SI.getDataLayout();
1712 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1714 &LI))
1715 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1716 else if (PreserveCFG)
1717 return Spec;
1718
1719 return Spec;
1720}
1721
1722std::optional<RewriteableMemOps>
1723SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1724 RewriteableMemOps Ops;
1725
1726 for (User *U : SI.users()) {
1727 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1728 U = *BC->user_begin();
1729
1730 if (auto *Store = dyn_cast<StoreInst>(U)) {
1731 // Note that atomic stores can be transformed; atomic semantics do not
1732 // have any meaning for a local alloca. Stores are not speculatable,
1733 // however, so if we can't turn it into a predicated store, we are done.
1734 if (Store->isVolatile() || PreserveCFG)
1735 return {}; // Give up on this `select`.
1736 Ops.emplace_back(Store);
1737 continue;
1738 }
1739
1740 auto *LI = dyn_cast<LoadInst>(U);
1741
1742 // Note that atomic loads can be transformed;
1743 // atomic semantics do not have any meaning for a local alloca.
1744 if (!LI || LI->isVolatile())
1745 return {}; // Give up on this `select`.
1746
1747 PossiblySpeculatableLoad Load(LI);
1748 if (!LI->isSimple()) {
1749 // If the `load` is not simple, we can't speculatively execute it,
1750 // but we could handle this via a CFG modification. But can we?
1751 if (PreserveCFG)
1752 return {}; // Give up on this `select`.
1753 Ops.emplace_back(Load);
1754 continue;
1755 }
1756
1757 SelectHandSpeculativity Spec =
1758 isSafeLoadOfSelectToSpeculate(*LI, SI, PreserveCFG);
1759 if (PreserveCFG && !Spec.areAllSpeculatable())
1760 return {}; // Give up on this `select`.
1761
1762 Load.setInt(Spec);
1763 Ops.emplace_back(Load);
1764 }
1765
1766 return Ops;
1767}
1768
1770 IRBuilderTy &IRB) {
1771 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
1772
1773 Value *TV = SI.getTrueValue();
1774 Value *FV = SI.getFalseValue();
1775 // Replace the given load of the select with a select of two loads.
1776
1777 assert(LI.isSimple() && "We only speculate simple loads");
1778
1779 IRB.SetInsertPoint(&LI);
1780
1781 LoadInst *TL =
1782 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1783 LI.getName() + ".sroa.speculate.load.true");
1784 LoadInst *FL =
1785 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1786 LI.getName() + ".sroa.speculate.load.false");
1787 NumLoadsSpeculated += 2;
1788
1789 // Transfer alignment and AA info if present.
1790 TL->setAlignment(LI.getAlign());
1791 FL->setAlignment(LI.getAlign());
1792
1793 AAMDNodes Tags = LI.getAAMetadata();
1794 if (Tags) {
1795 TL->setAAMetadata(Tags);
1796 FL->setAAMetadata(Tags);
1797 }
1798
1799 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1800 LI.getName() + ".sroa.speculated",
1801 ProfcheckDisableMetadataFixes ? nullptr : &SI);
1802
1803 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1804 LI.replaceAllUsesWith(V);
1805}
1806
1807template <typename T>
1809 SelectHandSpeculativity Spec,
1810 DomTreeUpdater &DTU) {
1811 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
1812 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1813 BasicBlock *Head = I.getParent();
1814 Instruction *ThenTerm = nullptr;
1815 Instruction *ElseTerm = nullptr;
1816 if (Spec.areNoneSpeculatable())
1817 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1818 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1819 else {
1820 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1821 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1822 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1823 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1824 cast<CondBrInst>(Head->getTerminator())->swapSuccessors();
1825 }
1826 auto *HeadBI = cast<CondBrInst>(Head->getTerminator());
1827 Spec = {}; // Do not use `Spec` beyond this point.
1828 BasicBlock *Tail = I.getParent();
1829 Tail->setName(Head->getName() + ".cont");
1830 PHINode *PN;
1831 if (isa<LoadInst>(I))
1832 PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
1833 for (BasicBlock *SuccBB : successors(Head)) {
1834 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1835 int SuccIdx = IsThen ? 0 : 1;
1836 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1837 auto &CondMemOp = cast<T>(*I.clone());
1838 if (NewMemOpBB != Head) {
1839 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1840 if (isa<LoadInst>(I))
1841 ++NumLoadsPredicated;
1842 else
1843 ++NumStoresPredicated;
1844 } else {
1845 CondMemOp.dropUBImplyingAttrsAndMetadata();
1846 ++NumLoadsSpeculated;
1847 }
1848 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1849 Value *Ptr = SI.getOperand(1 + SuccIdx);
1850 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1851 if (isa<LoadInst>(I)) {
1852 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1853 PN->addIncoming(&CondMemOp, NewMemOpBB);
1854 } else
1855 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
1856 }
1857 if (isa<LoadInst>(I)) {
1858 PN->takeName(&I);
1859 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
1860 I.replaceAllUsesWith(PN);
1861 }
1862}
1863
1865 SelectHandSpeculativity Spec,
1866 DomTreeUpdater &DTU) {
1867 if (auto *LI = dyn_cast<LoadInst>(&I))
1868 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1869 else if (auto *SI = dyn_cast<StoreInst>(&I))
1870 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1871 else
1872 llvm_unreachable_internal("Only for load and store.");
1873}
1874
1876 const RewriteableMemOps &Ops,
1877 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1878 bool CFGChanged = false;
1879 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
1880
1881 for (const RewriteableMemOp &Op : Ops) {
1882 SelectHandSpeculativity Spec;
1883 Instruction *I;
1884 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1885 I = *US;
1886 } else {
1887 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1888 I = PSL.getPointer();
1889 Spec = PSL.getInt();
1890 }
1891 if (Spec.areAllSpeculatable()) {
1893 } else {
1894 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1895 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1896 CFGChanged = true;
1897 }
1898 I->eraseFromParent();
1899 }
1900
1901 for (User *U : make_early_inc_range(SI.users()))
1902 cast<BitCastInst>(U)->eraseFromParent();
1903 SI.eraseFromParent();
1904 return CFGChanged;
1905}
1906
1907/// Compute an adjusted pointer from Ptr by Offset bytes where the
1908/// resulting pointer has PointerTy.
1909static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1911 const Twine &NamePrefix) {
1912 if (Offset != 0)
1913 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1914 NamePrefix + "sroa_idx");
1915 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1916 NamePrefix + "sroa_cast");
1917}
1918
1919/// Compute the adjusted alignment for a load or store from an offset.
1923
1924/// Test whether we can convert a value from the old to the new type.
1925///
1926/// This predicate should be used to guard calls to convertValue in order to
1927/// ensure that we only try to convert viable values. The strategy is that we
1928/// will peel off single element struct and array wrappings to get to an
1929/// underlying value, and convert that value.
1930static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy,
1931 unsigned VScale = 0) {
1932 if (OldTy == NewTy)
1933 return true;
1934
1935 // For integer types, we can't handle any bit-width differences. This would
1936 // break both vector conversions with extension and introduce endianness
1937 // issues when in conjunction with loads and stores.
1938 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1940 cast<IntegerType>(NewTy)->getBitWidth() &&
1941 "We can't have the same bitwidth for different int types");
1942 return false;
1943 }
1944
1945 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
1946 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
1947
1948 if ((isa<ScalableVectorType>(NewTy) && isa<FixedVectorType>(OldTy)) ||
1949 (isa<ScalableVectorType>(OldTy) && isa<FixedVectorType>(NewTy))) {
1950 // Conversion is only possible when the size of scalable vectors is known.
1951 if (!VScale)
1952 return false;
1953
1954 // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within
1955 // a single domain (either fixed or scalable). Any additional conversion
1956 // between fixed and scalable types is handled through integer types.
1957 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
1958 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
1959
1960 if (isa<ScalableVectorType>(NewTy)) {
1962 return false;
1963
1964 NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale);
1965 } else {
1967 return false;
1968
1969 OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale);
1970 }
1971 }
1972
1973 if (NewSize != OldSize)
1974 return false;
1975 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1976 return false;
1977
1978 // We can convert pointers to integers and vice-versa. Same for vectors
1979 // of pointers and integers.
1980 OldTy = OldTy->getScalarType();
1981 NewTy = NewTy->getScalarType();
1982 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1983 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
1984 unsigned OldAS = OldTy->getPointerAddressSpace();
1985 unsigned NewAS = NewTy->getPointerAddressSpace();
1986 // Convert pointers if they are pointers from the same address space or
1987 // different integral (not non-integral) address spaces with the same
1988 // pointer size.
1989 return OldAS == NewAS ||
1990 (!DL.isNonIntegralAddressSpace(OldAS) &&
1991 !DL.isNonIntegralAddressSpace(NewAS) &&
1992 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
1993 }
1994
1995 // We can convert integers to integral pointers, but not to non-integral
1996 // pointers.
1997 if (OldTy->isIntegerTy())
1998 return !DL.isNonIntegralPointerType(NewTy);
1999
2000 // We can convert integral pointers to integers, but non-integral pointers
2001 // need to remain pointers.
2002 if (!DL.isNonIntegralPointerType(OldTy))
2003 return NewTy->isIntegerTy();
2004
2005 return false;
2006 }
2007
2008 if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy())
2009 return false;
2010
2011 return true;
2012}
2013
2014/// Test whether the given slice use can be promoted to a vector.
2015///
2016/// This function is called to test each entry in a partition which is slated
2017/// for a single slice.
2018static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2019 VectorType *Ty,
2020 uint64_t ElementSize,
2021 const DataLayout &DL,
2022 unsigned VScale) {
2023 // First validate the slice offsets.
2024 uint64_t BeginOffset =
2025 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2026 uint64_t BeginIndex = BeginOffset / ElementSize;
2027 if (BeginIndex * ElementSize != BeginOffset ||
2028 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2029 return false;
2030 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2031 uint64_t EndIndex = EndOffset / ElementSize;
2032 if (EndIndex * ElementSize != EndOffset ||
2033 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2034 return false;
2035
2036 assert(EndIndex > BeginIndex && "Empty vector!");
2037 uint64_t NumElements = EndIndex - BeginIndex;
2038 Type *SliceTy = (NumElements == 1)
2039 ? Ty->getElementType()
2040 : FixedVectorType::get(Ty->getElementType(), NumElements);
2041
2042 Type *SplitIntTy =
2043 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2044
2045 Use *U = S.getUse();
2046
2047 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2048 if (MI->isVolatile())
2049 return false;
2050 if (!S.isSplittable())
2051 return false; // Skip any unsplittable intrinsics.
2052 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2053 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
2054 return false;
2055 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2056 if (LI->isVolatile())
2057 return false;
2058 Type *LTy = LI->getType();
2059 // Disable vector promotion when there are loads or stores of an FCA.
2060 if (LTy->isStructTy())
2061 return false;
2062 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2063 assert(LTy->isIntegerTy());
2064 LTy = SplitIntTy;
2065 }
2066 if (!canConvertValue(DL, SliceTy, LTy, VScale))
2067 return false;
2068 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2069 if (SI->isVolatile())
2070 return false;
2071 Type *STy = SI->getValueOperand()->getType();
2072 // Disable vector promotion when there are loads or stores of an FCA.
2073 if (STy->isStructTy())
2074 return false;
2075 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2076 assert(STy->isIntegerTy());
2077 STy = SplitIntTy;
2078 }
2079 if (!canConvertValue(DL, STy, SliceTy, VScale))
2080 return false;
2081 } else {
2082 return false;
2083 }
2084
2085 return true;
2086}
2087
2088/// Test whether any vector type in \p CandidateTys is viable for promotion.
2089///
2090/// This implements the necessary checking for \c isVectorPromotionViable over
2091/// all slices of the alloca for the given VectorType.
2092static VectorType *
2094 SmallVectorImpl<VectorType *> &CandidateTys,
2095 bool HaveCommonEltTy, Type *CommonEltTy,
2096 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2097 VectorType *CommonVecPtrTy, unsigned VScale) {
2098 // If we didn't find a vector type, nothing to do here.
2099 if (CandidateTys.empty())
2100 return nullptr;
2101
2102 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2103 // then we should choose it, not some other alternative.
2104 // But, we can't perform a no-op pointer address space change via bitcast,
2105 // so if we didn't have a common pointer element type, bail.
2106 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2107 return nullptr;
2108
2109 // Try to pick the "best" element type out of the choices.
2110 if (!HaveCommonEltTy && HaveVecPtrTy) {
2111 // If there was a pointer element type, there's really only one choice.
2112 CandidateTys.clear();
2113 CandidateTys.push_back(CommonVecPtrTy);
2114 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2115 // Integer-ify vector types.
2116 for (VectorType *&VTy : CandidateTys) {
2117 if (!VTy->getElementType()->isIntegerTy())
2118 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2119 VTy->getContext(), VTy->getScalarSizeInBits())));
2120 }
2121
2122 // Rank the remaining candidate vector types. This is easy because we know
2123 // they're all integer vectors. We sort by ascending number of elements.
2124 auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2125 (void)DL;
2126 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2127 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2128 "Cannot have vector types of different sizes!");
2129 assert(RHSTy->getElementType()->isIntegerTy() &&
2130 "All non-integer types eliminated!");
2131 assert(LHSTy->getElementType()->isIntegerTy() &&
2132 "All non-integer types eliminated!");
2133 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2134 cast<FixedVectorType>(LHSTy)->getNumElements();
2135 };
2136 auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2137 (void)DL;
2138 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2139 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2140 "Cannot have vector types of different sizes!");
2141 assert(RHSTy->getElementType()->isIntegerTy() &&
2142 "All non-integer types eliminated!");
2143 assert(LHSTy->getElementType()->isIntegerTy() &&
2144 "All non-integer types eliminated!");
2145 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2146 cast<FixedVectorType>(LHSTy)->getNumElements();
2147 };
2148 llvm::sort(CandidateTys, RankVectorTypesComp);
2149 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2150 CandidateTys.end());
2151 } else {
2152// The only way to have the same element type in every vector type is to
2153// have the same vector type. Check that and remove all but one.
2154#ifndef NDEBUG
2155 for (VectorType *VTy : CandidateTys) {
2156 assert(VTy->getElementType() == CommonEltTy &&
2157 "Unaccounted for element type!");
2158 assert(VTy == CandidateTys[0] &&
2159 "Different vector types with the same element type!");
2160 }
2161#endif
2162 CandidateTys.resize(1);
2163 }
2164
2165 // FIXME: hack. Do we have a named constant for this?
2166 // SDAG SDNode can't have more than 65535 operands.
2167 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2168 return cast<FixedVectorType>(VTy)->getNumElements() >
2169 std::numeric_limits<unsigned short>::max();
2170 });
2171
2172 // Find a vector type viable for promotion by iterating over all slices.
2173 auto *VTy = llvm::find_if(CandidateTys, [&](VectorType *VTy) -> bool {
2174 uint64_t ElementSize =
2175 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2176
2177 // While the definition of LLVM vectors is bitpacked, we don't support sizes
2178 // that aren't byte sized.
2179 if (ElementSize % 8)
2180 return false;
2181 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2182 "vector size not a multiple of element size?");
2183 ElementSize /= 8;
2184
2185 for (const Slice &S : P)
2186 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale))
2187 return false;
2188
2189 for (const Slice *S : P.splitSliceTails())
2190 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale))
2191 return false;
2192
2193 return true;
2194 });
2195 return VTy != CandidateTys.end() ? *VTy : nullptr;
2196}
2197
2199 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2200 function_ref<void(Type *)> CheckCandidateType, Partition &P,
2201 const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2202 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2203 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2204 [[maybe_unused]] VectorType *OriginalElt =
2205 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2206 // Consider additional vector types where the element type size is a
2207 // multiple of load/store element size.
2208 for (Type *Ty : OtherTys) {
2210 continue;
2211 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2212 // Make a copy of CandidateTys and iterate through it, because we
2213 // might append to CandidateTys in the loop.
2214 for (VectorType *const VTy : CandidateTysCopy) {
2215 // The elements in the copy should remain invariant throughout the loop
2216 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2217 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2218 unsigned ElementSize =
2219 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2220 if (TypeSize != VectorSize && TypeSize != ElementSize &&
2221 VectorSize % TypeSize == 0) {
2222 VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2223 CheckCandidateType(NewVTy);
2224 }
2225 }
2226 }
2227
2229 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2230 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2231}
2232
2233/// Test whether the given alloca partitioning and range of slices can be
2234/// promoted to a vector.
2235///
2236/// This is a quick test to check whether we can rewrite a particular alloca
2237/// partition (and its newly formed alloca) into a vector alloca with only
2238/// whole-vector loads and stores such that it could be promoted to a vector
2239/// SSA value. We only can ensure this for a limited set of operations, and we
2240/// don't want to do the rewrites unless we are confident that the result will
2241/// be promotable, so we have an early test here.
2243 unsigned VScale) {
2244 // Collect the candidate types for vector-based promotion. Also track whether
2245 // we have different element types.
2246 SmallVector<VectorType *, 4> CandidateTys;
2247 SetVector<Type *> LoadStoreTys;
2248 SetVector<Type *> DeferredTys;
2249 Type *CommonEltTy = nullptr;
2250 VectorType *CommonVecPtrTy = nullptr;
2251 bool HaveVecPtrTy = false;
2252 bool HaveCommonEltTy = true;
2253 bool HaveCommonVecPtrTy = true;
2254 auto CheckCandidateType = [&](Type *Ty) {
2255 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2256 // Return if bitcast to vectors is different for total size in bits.
2257 if (!CandidateTys.empty()) {
2258 VectorType *V = CandidateTys[0];
2259 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2260 DL.getTypeSizeInBits(V).getFixedValue()) {
2261 CandidateTys.clear();
2262 return;
2263 }
2264 }
2265 CandidateTys.push_back(VTy);
2266 Type *EltTy = VTy->getElementType();
2267
2268 if (!CommonEltTy)
2269 CommonEltTy = EltTy;
2270 else if (CommonEltTy != EltTy)
2271 HaveCommonEltTy = false;
2272
2273 if (EltTy->isPointerTy()) {
2274 HaveVecPtrTy = true;
2275 if (!CommonVecPtrTy)
2276 CommonVecPtrTy = VTy;
2277 else if (CommonVecPtrTy != VTy)
2278 HaveCommonVecPtrTy = false;
2279 }
2280 }
2281 };
2282
2283 // Put load and store types into a set for de-duplication.
2284 for (const Slice &S : P) {
2285 Type *Ty;
2286 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2287 Ty = LI->getType();
2288 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2289 Ty = SI->getValueOperand()->getType();
2290 else
2291 continue;
2292
2293 auto CandTy = Ty->getScalarType();
2294 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2295 S.endOffset() != P.endOffset())) {
2296 DeferredTys.insert(Ty);
2297 continue;
2298 }
2299
2300 LoadStoreTys.insert(Ty);
2301 // Consider any loads or stores that are the exact size of the slice.
2302 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2303 CheckCandidateType(Ty);
2304 }
2305
2306 SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2308 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2309 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2310 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2311 return VTy;
2312
2313 CandidateTys.clear();
2315 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2316 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2317 CommonVecPtrTy, VScale);
2318}
2319
2320/// Test whether a slice of an alloca is valid for integer widening.
2321///
2322/// This implements the necessary checking for the \c isIntegerWideningViable
2323/// test below on a single slice of the alloca.
2324static bool isIntegerWideningViableForSlice(const Slice &S,
2325 uint64_t AllocBeginOffset,
2326 Type *AllocaTy,
2327 const DataLayout &DL,
2328 bool &WholeAllocaOp) {
2329 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2330
2331 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2332 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2333
2334 Use *U = S.getUse();
2335
2336 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2337 // larger than other load/store slices (RelEnd > Size). But lifetime are
2338 // always promotable and should not impact other slices' promotability of the
2339 // partition.
2340 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2341 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2342 return true;
2343 }
2344
2345 // We can't reasonably handle cases where the load or store extends past
2346 // the end of the alloca's type and into its padding.
2347 if (RelEnd > Size)
2348 return false;
2349
2350 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2351 if (LI->isVolatile())
2352 return false;
2353 // We can't handle loads that extend past the allocated memory.
2354 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
2355 if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size)
2356 return false;
2357 // So far, AllocaSliceRewriter does not support widening split slice tails
2358 // in rewriteIntegerLoad.
2359 if (S.beginOffset() < AllocBeginOffset)
2360 return false;
2361 // Note that we don't count vector loads or stores as whole-alloca
2362 // operations which enable integer widening because we would prefer to use
2363 // vector widening instead.
2364 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2365 WholeAllocaOp = true;
2366 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2367 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2368 return false;
2369 } else if (RelBegin != 0 || RelEnd != Size ||
2370 !canConvertValue(DL, AllocaTy, LI->getType())) {
2371 // Non-integer loads need to be convertible from the alloca type so that
2372 // they are promotable.
2373 return false;
2374 }
2375 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2376 Type *ValueTy = SI->getValueOperand()->getType();
2377 if (SI->isVolatile())
2378 return false;
2379 // We can't handle stores that extend past the allocated memory.
2380 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
2381 if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size)
2382 return false;
2383 // So far, AllocaSliceRewriter does not support widening split slice tails
2384 // in rewriteIntegerStore.
2385 if (S.beginOffset() < AllocBeginOffset)
2386 return false;
2387 // Note that we don't count vector loads or stores as whole-alloca
2388 // operations which enable integer widening because we would prefer to use
2389 // vector widening instead.
2390 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2391 WholeAllocaOp = true;
2392 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2393 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2394 return false;
2395 } else if (RelBegin != 0 || RelEnd != Size ||
2396 !canConvertValue(DL, ValueTy, AllocaTy)) {
2397 // Non-integer stores need to be convertible to the alloca type so that
2398 // they are promotable.
2399 return false;
2400 }
2401 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2402 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2403 return false;
2404 if (!S.isSplittable())
2405 return false; // Skip any unsplittable intrinsics.
2406 } else {
2407 return false;
2408 }
2409
2410 return true;
2411}
2412
2413/// Test whether the given alloca partition's integer operations can be
2414/// widened to promotable ones.
2415///
2416/// This is a quick test to check whether we can rewrite the integer loads and
2417/// stores to a particular alloca into wider loads and stores and be able to
2418/// promote the resulting alloca.
2419static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2420 const DataLayout &DL) {
2421 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2422 // Don't create integer types larger than the maximum bitwidth.
2423 if (SizeInBits > IntegerType::MAX_INT_BITS)
2424 return false;
2425
2426 // Don't try to handle allocas with bit-padding.
2427 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2428 return false;
2429
2430 // We need to ensure that an integer type with the appropriate bitwidth can
2431 // be converted to the alloca type, whatever that is. We don't want to force
2432 // the alloca itself to have an integer type if there is a more suitable one.
2433 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2434 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2435 !canConvertValue(DL, IntTy, AllocaTy))
2436 return false;
2437
2438 // While examining uses, we ensure that the alloca has a covering load or
2439 // store. We don't want to widen the integer operations only to fail to
2440 // promote due to some other unsplittable entry (which we may make splittable
2441 // later). However, if there are only splittable uses, go ahead and assume
2442 // that we cover the alloca.
2443 // FIXME: We shouldn't consider split slices that happen to start in the
2444 // partition here...
2445 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2446
2447 for (const Slice &S : P)
2448 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2449 WholeAllocaOp))
2450 return false;
2451
2452 for (const Slice *S : P.splitSliceTails())
2453 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2454 WholeAllocaOp))
2455 return false;
2456
2457 return WholeAllocaOp;
2458}
2459
2460static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2462 const Twine &Name) {
2463 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2464 IntegerType *IntTy = cast<IntegerType>(V->getType());
2465 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2466 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2467 "Element extends past full value");
2468 uint64_t ShAmt = 8 * Offset;
2469 if (DL.isBigEndian())
2470 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2471 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2472 if (ShAmt) {
2473 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2474 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2475 }
2476 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2477 "Cannot extract to a larger integer!");
2478 if (Ty != IntTy) {
2479 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2480 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2481 }
2482 return V;
2483}
2484
2485static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2486 Value *V, uint64_t Offset, const Twine &Name) {
2487 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2488 IntegerType *Ty = cast<IntegerType>(V->getType());
2489 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2490 "Cannot insert a larger integer!");
2491 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2492 if (Ty != IntTy) {
2493 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2494 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2495 }
2496 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2497 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2498 "Element store outside of alloca store");
2499 uint64_t ShAmt = 8 * Offset;
2500 if (DL.isBigEndian())
2501 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2502 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2503 if (ShAmt) {
2504 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2505 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2506 }
2507
2508 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2509 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2510 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2511 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2512 V = IRB.CreateOr(Old, V, Name + ".insert");
2513 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2514 }
2515 return V;
2516}
2517
2518static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2519 unsigned EndIndex, const Twine &Name) {
2520 auto *VecTy = cast<FixedVectorType>(V->getType());
2521 unsigned NumElements = EndIndex - BeginIndex;
2522 assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2523
2524 if (NumElements == VecTy->getNumElements())
2525 return V;
2526
2527 if (NumElements == 1) {
2528 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2529 Name + ".extract");
2530 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2531 return V;
2532 }
2533
2534 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2535 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2536 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2537 return V;
2538}
2539
2540static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2541 unsigned BeginIndex, const Twine &Name) {
2542 VectorType *VecTy = cast<VectorType>(Old->getType());
2543 assert(VecTy && "Can only insert a vector into a vector");
2544
2545 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2546 if (!Ty) {
2547 // Single element to insert.
2548 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2549 Name + ".insert");
2550 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2551 return V;
2552 }
2553
2554 unsigned NumSubElements = cast<FixedVectorType>(Ty)->getNumElements();
2555 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
2556
2557 assert(NumSubElements <= NumElements && "Too many elements!");
2558 if (NumSubElements == NumElements) {
2559 assert(V->getType() == VecTy && "Vector type mismatch");
2560 return V;
2561 }
2562 unsigned EndIndex = BeginIndex + NumSubElements;
2563
2564 // When inserting a smaller vector into the larger to store, we first
2565 // use a shuffle vector to widen it with undef elements, and then
2566 // a second shuffle vector to select between the loaded vector and the
2567 // incoming vector.
2569 Mask.reserve(NumElements);
2570 for (unsigned Idx = 0; Idx != NumElements; ++Idx)
2571 if (Idx >= BeginIndex && Idx < EndIndex)
2572 Mask.push_back(Idx - BeginIndex);
2573 else
2574 Mask.push_back(-1);
2575 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2576 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2577
2578 Mask.clear();
2579 for (unsigned Idx = 0; Idx != NumElements; ++Idx)
2580 if (Idx >= BeginIndex && Idx < EndIndex)
2581 Mask.push_back(Idx);
2582 else
2583 Mask.push_back(Idx + NumElements);
2584 V = IRB.CreateShuffleVector(V, Old, Mask, Name + "blend");
2585 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2586 return V;
2587}
2588
2589/// This function takes two vector values and combines them into a single vector
2590/// by concatenating their elements. The function handles:
2591///
2592/// 1. Element type mismatch: If either vector's element type differs from
2593/// NewAIEltType, the function bitcasts the vector to use NewAIEltType while
2594/// preserving the total bit width (adjusting the number of elements
2595/// accordingly).
2596///
2597/// 2. Size mismatch: After transforming the vectors to have the desired element
2598/// type, if the two vectors have different numbers of elements, the smaller
2599/// vector is extended with poison values to match the size of the larger
2600/// vector before concatenation.
2601///
2602/// 3. Concatenation: The vectors are merged using a shuffle operation that
2603/// places all elements of V0 first, followed by all elements of V1.
2604///
2605/// \param V0 The first vector to merge (must be a vector type)
2606/// \param V1 The second vector to merge (must be a vector type)
2607/// \param DL The data layout for size calculations
2608/// \param NewAIEltTy The desired element type for the result vector
2609/// \param Builder IRBuilder for creating new instructions
2610/// \return A new vector containing all elements from V0 followed by all
2611/// elements from V1
2613 Type *NewAIEltTy, IRBuilder<> &Builder) {
2614 // V0 and V1 are vectors
2615 // Create a new vector type with combined elements
2616 // Use ShuffleVector to concatenate the vectors
2617 auto *VecType0 = cast<FixedVectorType>(V0->getType());
2618 auto *VecType1 = cast<FixedVectorType>(V1->getType());
2619
2620 // If V0/V1 element types are different from NewAllocaElementType,
2621 // we need to introduce bitcasts before merging them
2622 auto BitcastIfNeeded = [&](Value *&V, FixedVectorType *&VecType,
2623 const char *DebugName) {
2624 Type *EltType = VecType->getElementType();
2625 if (EltType != NewAIEltTy) {
2626 // Calculate new number of elements to maintain same bit width
2627 unsigned TotalBits =
2628 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);
2629 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);
2630
2631 auto *NewVecType = FixedVectorType::get(NewAIEltTy, NewNumElts);
2632 V = Builder.CreateBitCast(V, NewVecType);
2633 VecType = NewVecType;
2634 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");
2635 }
2636 };
2637
2638 BitcastIfNeeded(V0, VecType0, "V0");
2639 BitcastIfNeeded(V1, VecType1, "V1");
2640
2641 unsigned NumElts0 = VecType0->getNumElements();
2642 unsigned NumElts1 = VecType1->getNumElements();
2643
2644 SmallVector<int, 16> ShuffleMask;
2645
2646 if (NumElts0 == NumElts1) {
2647 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2648 ShuffleMask.push_back(i);
2649 } else {
2650 // If two vectors have different sizes, we need to extend
2651 // the smaller vector to the size of the larger vector.
2652 unsigned SmallSize = std::min(NumElts0, NumElts1);
2653 unsigned LargeSize = std::max(NumElts0, NumElts1);
2654 bool IsV0Smaller = NumElts0 < NumElts1;
2655 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2656 SmallVector<int, 16> ExtendMask;
2657 for (unsigned i = 0; i < SmallSize; ++i)
2658 ExtendMask.push_back(i);
2659 for (unsigned i = SmallSize; i < LargeSize; ++i)
2660 ExtendMask.push_back(PoisonMaskElem);
2661 ExtendedVec = Builder.CreateShuffleVector(
2662 ExtendedVec, PoisonValue::get(ExtendedVec->getType()), ExtendMask);
2663 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");
2664 for (unsigned i = 0; i < NumElts0; ++i)
2665 ShuffleMask.push_back(i);
2666 for (unsigned i = 0; i < NumElts1; ++i)
2667 ShuffleMask.push_back(LargeSize + i);
2668 }
2669
2670 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2671}
2672
2673namespace {
2674
2675/// Visitor to rewrite instructions using p particular slice of an alloca
2676/// to use a new alloca.
2677///
2678/// Also implements the rewriting to vector-based accesses when the partition
2679/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2680/// lives here.
2681class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2682 // Befriend the base class so it can delegate to private visit methods.
2683 friend class InstVisitor<AllocaSliceRewriter, bool>;
2684
2685 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2686
2687 const DataLayout &DL;
2688 AllocaSlices &AS;
2689 SROA &Pass;
2690 AllocaInst &OldAI, &NewAI;
2691 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2692 Type *NewAllocaTy;
2693
2694 // This is a convenience and flag variable that will be null unless the new
2695 // alloca's integer operations should be widened to this integer type due to
2696 // passing isIntegerWideningViable above. If it is non-null, the desired
2697 // integer type will be stored here for easy access during rewriting.
2698 IntegerType *IntTy;
2699
2700 // If we are rewriting an alloca partition which can be written as pure
2701 // vector operations, we stash extra information here. When VecTy is
2702 // non-null, we have some strict guarantees about the rewritten alloca:
2703 // - The new alloca is exactly the size of the vector type here.
2704 // - The accesses all either map to the entire vector or to a single
2705 // element.
2706 // - The set of accessing instructions is only one of those handled above
2707 // in isVectorPromotionViable. Generally these are the same access kinds
2708 // which are promotable via mem2reg.
2709 VectorType *VecTy;
2710 Type *ElementTy;
2711 uint64_t ElementSize;
2712
2713 // The original offset of the slice currently being rewritten relative to
2714 // the original alloca.
2715 uint64_t BeginOffset = 0;
2716 uint64_t EndOffset = 0;
2717
2718 // The new offsets of the slice currently being rewritten relative to the
2719 // original alloca.
2720 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2721
2722 uint64_t SliceSize = 0;
2723 bool IsSplittable = false;
2724 bool IsSplit = false;
2725 Use *OldUse = nullptr;
2726 Instruction *OldPtr = nullptr;
2727
2728 // Track post-rewrite users which are PHI nodes and Selects.
2729 SmallSetVector<PHINode *, 8> &PHIUsers;
2730 SmallSetVector<SelectInst *, 8> &SelectUsers;
2731
2732 // Utility IR builder, whose name prefix is setup for each visited use, and
2733 // the insertion point is set to point to the user.
2734 IRBuilderTy IRB;
2735
2736 // Return the new alloca, addrspacecasted if required to avoid changing the
2737 // addrspace of a volatile access.
2738 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2739 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2740 return &NewAI;
2741
2742 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2743 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2744 }
2745
2746public:
2747 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2748 AllocaInst &OldAI, AllocaInst &NewAI, Type *NewAllocaTy,
2749 uint64_t NewAllocaBeginOffset,
2750 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2751 VectorType *PromotableVecTy,
2752 SmallSetVector<PHINode *, 8> &PHIUsers,
2753 SmallSetVector<SelectInst *, 8> &SelectUsers)
2754 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2755 NewAllocaBeginOffset(NewAllocaBeginOffset),
2756 NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAllocaTy),
2757 IntTy(IsIntegerPromotable
2758 ? Type::getIntNTy(
2759 NewAI.getContext(),
2760 DL.getTypeSizeInBits(NewAllocaTy).getFixedValue())
2761 : nullptr),
2762 VecTy(PromotableVecTy),
2763 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2764 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2765 : 0),
2766 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2767 IRB(NewAI.getContext(), ConstantFolder()) {
2768 if (VecTy) {
2769 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2770 "Only multiple-of-8 sized vector elements are viable");
2771 ++NumVectorized;
2772 }
2773 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2774 }
2775
2776 bool visit(AllocaSlices::const_iterator I) {
2777 bool CanSROA = true;
2778 BeginOffset = I->beginOffset();
2779 EndOffset = I->endOffset();
2780 IsSplittable = I->isSplittable();
2781 IsSplit =
2782 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2783 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2784 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2785 LLVM_DEBUG(dbgs() << "\n");
2786
2787 // Compute the intersecting offset range.
2788 assert(BeginOffset < NewAllocaEndOffset);
2789 assert(EndOffset > NewAllocaBeginOffset);
2790 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2791 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2792
2793 SliceSize = NewEndOffset - NewBeginOffset;
2794 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2795 << ") NewBegin:(" << NewBeginOffset << ", "
2796 << NewEndOffset << ") NewAllocaBegin:("
2797 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2798 << ")\n");
2799 assert(IsSplit || NewBeginOffset == BeginOffset);
2800 OldUse = I->getUse();
2801 OldPtr = cast<Instruction>(OldUse->get());
2802
2803 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2804 IRB.SetInsertPoint(OldUserI);
2805 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2806 // Avoid materializing the name prefix when it is discarded anyway.
2807 if (!IRB.getContext().shouldDiscardValueNames())
2808 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2809 Twine(BeginOffset) + ".");
2810
2811 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2812 if (VecTy || IntTy)
2813 assert(CanSROA);
2814 return CanSROA;
2815 }
2816
2817 /// Attempts to rewrite a partition using tree-structured merge optimization.
2818 ///
2819 /// This function analyzes a partition to determine if it can be optimized
2820 /// using a tree-structured merge pattern, where multiple non-overlapping
2821 /// stores completely fill an alloca. And there is no load from the alloca in
2822 /// the middle of the stores. Such patterns can be optimized by eliminating
2823 /// the intermediate stores and directly constructing the final vector by
2824 /// using shufflevectors.
2825 ///
2826 /// Example transformation:
2827 /// Before: (stores do not have to be in order)
2828 /// %alloca = alloca <8 x float>
2829 /// store <2 x float> %val0, ptr %alloca ; offset 0-1
2830 /// store <2 x float> %val2, ptr %alloca+16 ; offset 4-5
2831 /// store <2 x float> %val1, ptr %alloca+8 ; offset 2-3
2832 /// store <2 x float> %val3, ptr %alloca+24 ; offset 6-7
2833 ///
2834 /// After:
2835 /// %alloca = alloca <8 x float>
2836 /// %shuffle0 = shufflevector %val0, %val1, <4 x i32> <i32 0, i32 1, i32 2,
2837 /// i32 3>
2838 /// %shuffle1 = shufflevector %val2, %val3, <4 x i32> <i32 0, i32 1, i32 2,
2839 /// i32 3>
2840 /// %shuffle2 = shufflevector %shuffle0, %shuffle1, <8 x i32> <i32 0, i32 1,
2841 /// i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2842 /// store %shuffle2, ptr %alloca
2843 ///
2844 /// The optimization looks for partitions that:
2845 /// 1. Have no overlapping split slice tails
2846 /// 2. Contain non-overlapping stores that cover the entire alloca
2847 /// 3. Have exactly one load that reads the complete alloca structure and not
2848 /// in the middle of the stores (TODO: maybe we can relax the constraint
2849 /// about reading the entire alloca structure)
2850 ///
2851 /// \param P The partition to analyze and potentially rewrite
2852 /// \return An optional vector of values that were deleted during the rewrite
2853 /// process, or std::nullopt if the partition cannot be optimized
2854 /// using tree-structured merge
2855 std::optional<SmallVector<Value *, 4>>
2856 rewriteTreeStructuredMerge(Partition &P) {
2857 // No tail slices that overlap with the partition
2858 if (P.splitSliceTails().size() > 0)
2859 return std::nullopt;
2860
2861 SmallVector<Value *, 4> DeletedValues;
2862 LoadInst *TheLoad = nullptr;
2863
2864 // Structure to hold store information
2865 struct StoreInfo {
2866 StoreInst *Store;
2867 uint64_t BeginOffset;
2868 uint64_t EndOffset;
2869 Value *StoredValue;
2870 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)
2871 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2872 };
2873
2874 SmallVector<StoreInfo, 4> StoreInfos;
2875
2876 // If the new alloca is a fixed vector type, we use its element type as the
2877 // allocated element type, otherwise we use i8 as the allocated element
2878 Type *AllocatedEltTy =
2879 isa<FixedVectorType>(NewAllocaTy)
2880 ? cast<FixedVectorType>(NewAllocaTy)->getElementType()
2881 : Type::getInt8Ty(NewAI.getContext());
2882 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
2883
2884 // Helper to check if a type is
2885 // 1. A fixed vector type
2886 // 2. The element type is not a pointer
2887 // 3. The element type size is byte-aligned
2888 // We only handle the cases that the ld/st meet these conditions
2889 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {
2890 auto *FixedVecTy = dyn_cast<FixedVectorType>(Ty);
2891 return FixedVecTy &&
2892 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2893 !FixedVecTy->getElementType()->isPointerTy();
2894 };
2895
2896 for (Slice &S : P) {
2897 auto *User = cast<Instruction>(S.getUse()->getUser());
2898 if (auto *LI = dyn_cast<LoadInst>(User)) {
2899 // Do not handle the case if
2900 // 1. There is more than one load
2901 // 2. The load is volatile
2902 // 3. The load does not read the entire alloca structure
2903 // 4. The load does not meet the conditions in the helper function
2904 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||
2905 S.beginOffset() != NewAllocaBeginOffset ||
2906 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())
2907 return std::nullopt;
2908 TheLoad = LI;
2909 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
2910 // Do not handle the case if
2911 // 1. The store does not meet the conditions in the helper function
2912 // 2. The store is volatile
2913 // 3. The total store size is not a multiple of the allocated element
2914 // type size
2915 if (!IsTypeValidForTreeStructuredMerge(
2916 SI->getValueOperand()->getType()) ||
2917 SI->isVolatile())
2918 return std::nullopt;
2919 auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
2920 unsigned NumElts = VecTy->getNumElements();
2921 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
2922 if (NumElts * EltSize % AllocatedEltTySize != 0)
2923 return std::nullopt;
2924 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
2925 SI->getValueOperand());
2926 } else {
2927 // If we have instructions other than load and store, we cannot do the
2928 // tree structured merge
2929 return std::nullopt;
2930 }
2931 }
2932 // If we do not have any load, we cannot do the tree structured merge
2933 if (!TheLoad)
2934 return std::nullopt;
2935
2936 // If we do not have multiple stores, we cannot do the tree structured merge
2937 if (StoreInfos.size() < 2)
2938 return std::nullopt;
2939
2940 // Stores should not overlap and should cover the whole alloca
2941 // Sort by begin offset
2942 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {
2943 return A.BeginOffset < B.BeginOffset;
2944 });
2945
2946 // Check for overlaps and coverage
2947 uint64_t ExpectedStart = NewAllocaBeginOffset;
2948 for (auto &StoreInfo : StoreInfos) {
2949 uint64_t BeginOff = StoreInfo.BeginOffset;
2950 uint64_t EndOff = StoreInfo.EndOffset;
2951
2952 // Check for gap or overlap
2953 if (BeginOff != ExpectedStart)
2954 return std::nullopt;
2955
2956 ExpectedStart = EndOff;
2957 }
2958 // Check that stores cover the entire alloca
2959 if (ExpectedStart != NewAllocaEndOffset)
2960 return std::nullopt;
2961
2962 // Stores should be in the same basic block
2963 // The load should not be in the middle of the stores
2964 // Note:
2965 // If the load is in a different basic block with the stores, we can still
2966 // do the tree structured merge. This is because we do not have the
2967 // store->load forwarding here. The merged vector will be stored back to
2968 // NewAI and the new load will load from NewAI. The forwarding will be
2969 // handled later when we try to promote NewAI.
2970 BasicBlock *LoadBB = TheLoad->getParent();
2971 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
2972
2973 for (auto &StoreInfo : StoreInfos) {
2974 if (StoreInfo.Store->getParent() != StoreBB)
2975 return std::nullopt;
2976 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
2977 return std::nullopt;
2978 }
2979
2980 // If we reach here, the partition can be merged with a tree structured
2981 // merge
2982 LLVM_DEBUG({
2983 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad
2984 << "\n Ordered stores:\n";
2985 for (auto [i, Info] : enumerate(StoreInfos))
2986 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "
2987 << Info.EndOffset << ") \tStore: " << *Info.Store
2988 << "\tValue: " << *Info.StoredValue << "\n";
2989 });
2990
2991 // Instead of having these stores, we merge all the stored values into a
2992 // vector and store the merged value into the alloca
2993 std::queue<Value *> VecElements;
2994 // StoreInfos is sorted by offset, not by block order. Anchoring to
2995 // StoreInfos.back().Store (last by offset) can place shuffles before
2996 // operands that appear later in the block (invalid SSA). Insert before
2997 // TheLoad when it shares the store block (after all stores, before any
2998 // later IR in that block). Otherwise insert before the store block's
2999 // terminator so the merge runs after every store and any trailing
3000 // instructions in that block.
3001 IRBuilder<> Builder(LoadBB == StoreBB ? TheLoad : StoreBB->getTerminator());
3002 for (const auto &Info : StoreInfos) {
3003 DeletedValues.push_back(Info.Store);
3004 VecElements.push(Info.StoredValue);
3005 }
3006
3007 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");
3008 while (VecElements.size() > 1) {
3009 const auto NumElts = VecElements.size();
3010 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {
3011 Value *V0 = VecElements.front();
3012 VecElements.pop();
3013 Value *V1 = VecElements.front();
3014 VecElements.pop();
3015 Value *Merged = mergeTwoVectors(V0, V1, DL, AllocatedEltTy, Builder);
3016 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");
3017 VecElements.push(Merged);
3018 }
3019 if (NumElts % 2 == 1) {
3020 Value *V = VecElements.front();
3021 VecElements.pop();
3022 VecElements.push(V);
3023 }
3024 }
3025
3026 // Store the merged value into the alloca
3027 Value *MergedValue = VecElements.front();
3028 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3029
3030 IRBuilder<> LoadBuilder(TheLoad);
3031 TheLoad->replaceAllUsesWith(LoadBuilder.CreateAlignedLoad(
3032 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),
3033 TheLoad->getName() + ".sroa.new.load"));
3034 DeletedValues.push_back(TheLoad);
3035
3036 return DeletedValues;
3037 }
3038
3039private:
3040 // Make sure the other visit overloads are visible.
3041 using Base::visit;
3042
3043 // Every instruction which can end up as a user must have a rewrite rule.
3044 bool visitInstruction(Instruction &I) {
3045 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
3046 llvm_unreachable("No rewrite rule for this instruction!");
3047 }
3048
3049 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
3050 // Note that the offset computation can use BeginOffset or NewBeginOffset
3051 // interchangeably for unsplit slices.
3052 assert(IsSplit || BeginOffset == NewBeginOffset);
3053 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3054
3055 StringRef OldName = OldPtr->getName();
3056 // Skip through the last '.sroa.' component of the name.
3057 size_t LastSROAPrefix = OldName.rfind(".sroa.");
3058 if (LastSROAPrefix != StringRef::npos) {
3059 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
3060 // Look for an SROA slice index.
3061 size_t IndexEnd = OldName.find_first_not_of("0123456789");
3062 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
3063 // Strip the index and look for the offset.
3064 OldName = OldName.substr(IndexEnd + 1);
3065 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
3066 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
3067 // Strip the offset.
3068 OldName = OldName.substr(OffsetEnd + 1);
3069 }
3070 }
3071 // Strip any SROA suffixes as well.
3072 OldName = OldName.substr(0, OldName.find(".sroa_"));
3073
3074 return getAdjustedPtr(IRB, DL, &NewAI,
3075 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
3076 PointerTy, Twine(OldName) + ".");
3077 }
3078
3079 /// Compute suitable alignment to access this slice of the *new*
3080 /// alloca.
3081 ///
3082 /// You can optionally pass a type to this routine and if that type's ABI
3083 /// alignment is itself suitable, this will return zero.
3084 Align getSliceAlign() {
3085 return commonAlignment(NewAI.getAlign(),
3086 NewBeginOffset - NewAllocaBeginOffset);
3087 }
3088
3089 unsigned getIndex(uint64_t Offset) {
3090 assert(VecTy && "Can only call getIndex when rewriting a vector");
3091 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
3092 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
3093 uint32_t Index = RelOffset / ElementSize;
3094 assert(Index * ElementSize == RelOffset);
3095 return Index;
3096 }
3097
3098 void deleteIfTriviallyDead(Value *V) {
3101 Pass.DeadInsts.push_back(I);
3102 }
3103
3104 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3105 unsigned BeginIndex = getIndex(NewBeginOffset);
3106 unsigned EndIndex = getIndex(NewEndOffset);
3107 assert(EndIndex > BeginIndex && "Empty vector!");
3108
3109 LoadInst *Load =
3110 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3111
3112 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3113 LLVMContext::MD_access_group});
3114 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
3115 }
3116
3117 Value *rewriteIntegerLoad(LoadInst &LI) {
3118 assert(IntTy && "We cannot insert an integer to the alloca");
3119 assert(!LI.isVolatile());
3120 Value *V =
3121 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3122 V = IRB.CreateBitPreservingCastChain(DL, V, IntTy);
3123 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3124 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3125 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3126 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
3127 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
3128 }
3129 // It is possible that the extracted type is not the load type. This
3130 // happens if there is a load past the end of the alloca, and as
3131 // a consequence the slice is narrower but still a candidate for integer
3132 // lowering. To handle this case, we just zero extend the extracted
3133 // integer.
3134 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
3135 "Can only handle an extract for an overly wide load");
3136 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
3137 V = IRB.CreateZExt(V, LI.getType());
3138 return V;
3139 }
3140
3141 bool visitLoadInst(LoadInst &LI) {
3142 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3143 Value *OldOp = LI.getOperand(0);
3144 assert(OldOp == OldPtr);
3145
3146 AAMDNodes AATags = LI.getAAMetadata();
3147
3148 unsigned AS = LI.getPointerAddressSpace();
3149
3150 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
3151 : LI.getType();
3152 bool IsPtrAdjusted = false;
3153 Value *V;
3154 if (VecTy) {
3155 V = rewriteVectorizedLoadInst(LI);
3156 } else if (IntTy && LI.getType()->isIntegerTy()) {
3157 V = rewriteIntegerLoad(LI);
3158 } else if (NewBeginOffset == NewAllocaBeginOffset &&
3159 NewEndOffset == NewAllocaEndOffset &&
3160 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
3161 (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() &&
3162 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3163 !LI.isVolatile()))) {
3164 Value *NewPtr =
3165 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3166 LoadInst *NewLI = IRB.CreateAlignedLoad(
3167 NewAllocaTy, NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName());
3168 if (LI.isVolatile())
3169 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3170 if (NewLI->isAtomic())
3171 NewLI->setAlignment(LI.getAlign());
3172
3173 // Copy any metadata that is valid for the new load. This may require
3174 // conversion to a different kind of metadata, e.g. !nonnull might change
3175 // to !range or vice versa.
3176 copyMetadataForLoad(*NewLI, LI);
3177
3178 // Do this after copyMetadataForLoad() to preserve the TBAA shift.
3179 if (AATags)
3180 NewLI->setAAMetadata(AATags.adjustForAccess(
3181 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3182
3183 // Try to preserve nonnull metadata
3184 V = NewLI;
3185
3186 // If this is an integer load past the end of the slice (which means the
3187 // bytes outside the slice are undef or this load is dead) just forcibly
3188 // fix the integer size with correct handling of endianness.
3189 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3190 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3191 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3192 V = IRB.CreateZExt(V, TITy, "load.ext");
3193 if (DL.isBigEndian())
3194 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3195 "endian_shift");
3196 }
3197 } else {
3198 Type *LTy = IRB.getPtrTy(AS);
3199 LoadInst *NewLI =
3200 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3201 getSliceAlign(), LI.isVolatile(), LI.getName());
3202
3203 if (AATags)
3204 NewLI->setAAMetadata(AATags.adjustForAccess(
3205 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3206
3207 if (LI.isVolatile())
3208 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3209 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3210 LLVMContext::MD_access_group});
3211
3212 V = NewLI;
3213 IsPtrAdjusted = true;
3214 }
3215 V = IRB.CreateBitPreservingCastChain(DL, V, TargetTy);
3216
3217 if (IsSplit) {
3218 assert(!LI.isVolatile());
3219 assert(LI.getType()->isIntegerTy() &&
3220 "Only integer type loads and stores are split");
3221 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
3222 "Split load isn't smaller than original load");
3223 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
3224 "Non-byte-multiple bit width");
3225 // Move the insertion point just past the load so that we can refer to it.
3226 BasicBlock::iterator LIIt = std::next(LI.getIterator());
3227 // Ensure the insertion point comes before any debug-info immediately
3228 // after the load, so that variable values referring to the load are
3229 // dominated by it.
3230 LIIt.setHeadBit(true);
3231 IRB.SetInsertPoint(LI.getParent(), LIIt);
3232 // Create a placeholder value with the same type as LI to use as the
3233 // basis for the new value. This allows us to replace the uses of LI with
3234 // the computed value, and then replace the placeholder with LI, leaving
3235 // LI only used for this computation.
3236 Value *Placeholder =
3237 new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
3238 false, Align(1));
3239 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
3240 "insert");
3241 LI.replaceAllUsesWith(V);
3242 Placeholder->replaceAllUsesWith(&LI);
3243 Placeholder->deleteValue();
3244 } else {
3245 LI.replaceAllUsesWith(V);
3246 }
3247
3248 Pass.DeadInsts.push_back(&LI);
3249 deleteIfTriviallyDead(OldOp);
3250 LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
3251 return !LI.isVolatile() && !IsPtrAdjusted;
3252 }
3253
3254 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3255 AAMDNodes AATags) {
3256 // Capture V for the purpose of debug-info accounting once it's converted
3257 // to a vector store.
3258 Value *OrigV = V;
3259 if (V->getType() != VecTy) {
3260 unsigned BeginIndex = getIndex(NewBeginOffset);
3261 unsigned EndIndex = getIndex(NewEndOffset);
3262 assert(EndIndex > BeginIndex && "Empty vector!");
3263 unsigned NumElements = EndIndex - BeginIndex;
3264 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3265 "Too many elements!");
3266 Type *SliceTy = (NumElements == 1)
3267 ? ElementTy
3268 : FixedVectorType::get(ElementTy, NumElements);
3269 if (V->getType() != SliceTy)
3270 V = IRB.CreateBitPreservingCastChain(DL, V, SliceTy);
3271
3272 // Mix in the existing elements.
3273 Value *Old =
3274 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3275 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3276 }
3277 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3278 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3279 LLVMContext::MD_access_group});
3280 if (AATags)
3281 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3282 V->getType(), DL));
3283 Pass.DeadInsts.push_back(&SI);
3284
3285 // NOTE: Careful to use OrigV rather than V.
3286 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3287 Store, Store->getPointerOperand(), OrigV, DL);
3288 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3289 return true;
3290 }
3291
3292 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3293 assert(IntTy && "We cannot extract an integer from the alloca");
3294 assert(!SI.isVolatile());
3295 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3296 IntTy->getBitWidth()) {
3297 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3298 "oldload");
3299 Old = IRB.CreateBitPreservingCastChain(DL, Old, IntTy);
3300 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3301 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3302 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
3303 }
3304 V = IRB.CreateBitPreservingCastChain(DL, V, NewAllocaTy);
3305 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3306 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3307 LLVMContext::MD_access_group});
3308 if (AATags)
3309 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3310 V->getType(), DL));
3311
3312 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3313 Store, Store->getPointerOperand(),
3314 Store->getValueOperand(), DL);
3315
3316 Pass.DeadInsts.push_back(&SI);
3317 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3318 return true;
3319 }
3320
3321 bool visitStoreInst(StoreInst &SI) {
3322 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3323 Value *OldOp = SI.getOperand(1);
3324 assert(OldOp == OldPtr);
3325
3326 AAMDNodes AATags = SI.getAAMetadata();
3327 Value *V = SI.getValueOperand();
3328
3329 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3330 // alloca that should be re-examined after promoting this alloca.
3331 if (V->getType()->isPointerTy())
3332 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
3333 Pass.PostPromotionWorklist.insert(AI);
3334
3335 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
3336 if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) {
3337 assert(!SI.isVolatile());
3338 assert(V->getType()->isIntegerTy() &&
3339 "Only integer type loads and stores are split");
3340 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3341 "Non-byte-multiple bit width");
3342 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3343 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3344 "extract");
3345 }
3346
3347 if (VecTy)
3348 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3349 if (IntTy && V->getType()->isIntegerTy())
3350 return rewriteIntegerStore(V, SI, AATags);
3351
3352 StoreInst *NewSI;
3353 if (NewBeginOffset == NewAllocaBeginOffset &&
3354 NewEndOffset == NewAllocaEndOffset &&
3355 canConvertValue(DL, V->getType(), NewAllocaTy)) {
3356 V = IRB.CreateBitPreservingCastChain(DL, V, NewAllocaTy);
3357 Value *NewPtr =
3358 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3359
3360 NewSI =
3361 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3362 } else {
3363 unsigned AS = SI.getPointerAddressSpace();
3364 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3365 NewSI =
3366 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3367 }
3368 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3369 LLVMContext::MD_access_group});
3370 if (AATags)
3371 NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3372 V->getType(), DL));
3373 if (SI.isVolatile())
3374 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3375 if (NewSI->isAtomic())
3376 NewSI->setAlignment(SI.getAlign());
3377
3378 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3379 NewSI, NewSI->getPointerOperand(),
3380 NewSI->getValueOperand(), DL);
3381
3382 Pass.DeadInsts.push_back(&SI);
3383 deleteIfTriviallyDead(OldOp);
3384
3385 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
3386 return NewSI->getPointerOperand() == &NewAI &&
3387 NewSI->getValueOperand()->getType() == NewAllocaTy &&
3388 !SI.isVolatile();
3389 }
3390
3391 /// Compute an integer value from splatting an i8 across the given
3392 /// number of bytes.
3393 ///
3394 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
3395 /// call this routine.
3396 /// FIXME: Heed the advice above.
3397 ///
3398 /// \param V The i8 value to splat.
3399 /// \param Size The number of bytes in the output (assuming i8 is one byte)
3400 Value *getIntegerSplat(Value *V, unsigned Size) {
3401 assert(Size > 0 && "Expected a positive number of bytes.");
3402 IntegerType *VTy = cast<IntegerType>(V->getType());
3403 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3404 if (Size == 1)
3405 return V;
3406
3407 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
3408 V = IRB.CreateMul(
3409 IRB.CreateZExt(V, SplatIntTy, "zext"),
3410 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
3411 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
3412 SplatIntTy)),
3413 "isplat");
3414 return V;
3415 }
3416
3417 /// Compute a vector splat for a given element value.
3418 Value *getVectorSplat(Value *V, unsigned NumElements) {
3419 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3420 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
3421 return V;
3422 }
3423
3424 bool visitMemSetInst(MemSetInst &II) {
3425 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3426 assert(II.getRawDest() == OldPtr);
3427
3428 AAMDNodes AATags = II.getAAMetadata();
3429
3430 // If the memset has a variable size, it cannot be split, just adjust the
3431 // pointer to the new alloca.
3432 if (!isa<ConstantInt>(II.getLength())) {
3433 assert(!IsSplit);
3434 assert(NewBeginOffset == BeginOffset);
3435 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3436 II.setDestAlignment(getSliceAlign());
3437 // In theory we should call migrateDebugInfo here. However, we do not
3438 // emit dbg.assign intrinsics for mem intrinsics storing through non-
3439 // constant geps, or storing a variable number of bytes.
3441 "AT: Unexpected link to non-const GEP");
3442 deleteIfTriviallyDead(OldPtr);
3443 return false;
3444 }
3445
3446 // Record this instruction for deletion.
3447 Pass.DeadInsts.push_back(&II);
3448
3449 Type *ScalarTy = NewAllocaTy->getScalarType();
3450
3451 const bool CanContinue = [&]() {
3452 if (VecTy || IntTy)
3453 return true;
3454 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3455 return false;
3456 // Length must be in range for FixedVectorType.
3457 auto *C = cast<ConstantInt>(II.getLength());
3458 const uint64_t Len = C->getLimitedValue();
3459 if (Len > std::numeric_limits<unsigned>::max())
3460 return false;
3461 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3462 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
3463 return canConvertValue(DL, SrcTy, NewAllocaTy) &&
3464 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3465 }();
3466
3467 // If this doesn't map cleanly onto the alloca type, and that type isn't
3468 // a single value type, just emit a memset.
3469 if (!CanContinue) {
3470 Type *SizeTy = II.getLength()->getType();
3471 unsigned Sz = NewEndOffset - NewBeginOffset;
3472 Constant *Size = ConstantInt::get(SizeTy, Sz);
3473 MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3474 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3475 MaybeAlign(getSliceAlign()), II.isVolatile()));
3476 if (AATags)
3477 New->setAAMetadata(
3478 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3479
3480 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3481 New, New->getRawDest(), nullptr, DL);
3482
3483 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3484 return false;
3485 }
3486
3487 // If we can represent this as a simple value, we have to build the actual
3488 // value to store, which requires expanding the byte present in memset to
3489 // a sensible representation for the alloca type. This is essentially
3490 // splatting the byte to a sufficiently wide integer, splatting it across
3491 // any desired vector width, and bitcasting to the final type.
3492 Value *V;
3493
3494 if (VecTy) {
3495 // If this is a memset of a vectorized alloca, insert it.
3496 assert(ElementTy == ScalarTy);
3497
3498 unsigned BeginIndex = getIndex(NewBeginOffset);
3499 unsigned EndIndex = getIndex(NewEndOffset);
3500 assert(EndIndex > BeginIndex && "Empty vector!");
3501 unsigned NumElements = EndIndex - BeginIndex;
3502 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3503 "Too many elements!");
3504
3505 Value *Splat = getIntegerSplat(
3506 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3507 Splat = IRB.CreateBitPreservingCastChain(DL, Splat, ElementTy);
3508 if (NumElements > 1)
3509 Splat = getVectorSplat(Splat, NumElements);
3510
3511 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3512 "oldload");
3513 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3514 } else if (IntTy) {
3515 // If this is a memset on an alloca where we can widen stores, insert the
3516 // set integer.
3517 assert(!II.isVolatile());
3518
3519 uint64_t Size = NewEndOffset - NewBeginOffset;
3520 V = getIntegerSplat(II.getValue(), Size);
3521
3522 if (IntTy && (NewBeginOffset != NewAllocaBeginOffset ||
3523 NewEndOffset != NewAllocaEndOffset)) {
3524 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI,
3525 NewAI.getAlign(), "oldload");
3526 Old = IRB.CreateBitPreservingCastChain(DL, Old, IntTy);
3527 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3528 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3529 } else {
3530 assert(V->getType() == IntTy &&
3531 "Wrong type for an alloca wide integer!");
3532 }
3533 V = IRB.CreateBitPreservingCastChain(DL, V, NewAllocaTy);
3534 } else {
3535 // Established these invariants above.
3536 assert(NewBeginOffset == NewAllocaBeginOffset);
3537 assert(NewEndOffset == NewAllocaEndOffset);
3538
3539 V = getIntegerSplat(II.getValue(),
3540 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3541 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(NewAllocaTy))
3542 V = getVectorSplat(
3543 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3544
3545 V = IRB.CreateBitPreservingCastChain(DL, V, NewAllocaTy);
3546 }
3547
3548 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3549 StoreInst *New =
3550 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3551 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3552 LLVMContext::MD_access_group});
3553 if (AATags)
3554 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3555 V->getType(), DL));
3556
3557 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3558 New, New->getPointerOperand(), V, DL);
3559
3560 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3561 return !II.isVolatile();
3562 }
3563
3564 bool visitMemTransferInst(MemTransferInst &II) {
3565 // Rewriting of memory transfer instructions can be a bit tricky. We break
3566 // them into two categories: split intrinsics and unsplit intrinsics.
3567
3568 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3569
3570 AAMDNodes AATags = II.getAAMetadata();
3571
3572 bool IsDest = &II.getRawDestUse() == OldUse;
3573 assert((IsDest && II.getRawDest() == OldPtr) ||
3574 (!IsDest && II.getRawSource() == OldPtr));
3575
3576 Align SliceAlign = getSliceAlign();
3577 // For unsplit intrinsics, we simply modify the source and destination
3578 // pointers in place. This isn't just an optimization, it is a matter of
3579 // correctness. With unsplit intrinsics we may be dealing with transfers
3580 // within a single alloca before SROA ran, or with transfers that have
3581 // a variable length. We may also be dealing with memmove instead of
3582 // memcpy, and so simply updating the pointers is the necessary for us to
3583 // update both source and dest of a single call.
3584 if (!IsSplittable) {
3585 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3586 if (IsDest) {
3587 // Update the address component of linked dbg.assigns.
3588 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(&II)) {
3589 if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) ||
3590 DbgAssign->getAddress() == II.getDest())
3591 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3592 }
3593 II.setDest(AdjustedPtr);
3594 II.setDestAlignment(SliceAlign);
3595 } else {
3596 II.setSource(AdjustedPtr);
3597 II.setSourceAlignment(SliceAlign);
3598 }
3599
3600 LLVM_DEBUG(dbgs() << " to: " << II << "\n");
3601 deleteIfTriviallyDead(OldPtr);
3602 return false;
3603 }
3604 // For split transfer intrinsics we have an incredibly useful assurance:
3605 // the source and destination do not reside within the same alloca, and at
3606 // least one of them does not escape. This means that we can replace
3607 // memmove with memcpy, and we don't need to worry about all manner of
3608 // downsides to splitting and transforming the operations.
3609
3610 // If this doesn't map cleanly onto the alloca type, and that type isn't
3611 // a single value type, just emit a memcpy.
3612 bool EmitMemCpy =
3613 !VecTy && !IntTy &&
3614 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3615 SliceSize != DL.getTypeStoreSize(NewAllocaTy).getFixedValue() ||
3616 !DL.typeSizeEqualsStoreSize(NewAllocaTy) ||
3617 !NewAllocaTy->isSingleValueType());
3618
3619 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3620 // size hasn't been shrunk based on analysis of the viable range, this is
3621 // a no-op.
3622 if (EmitMemCpy && &OldAI == &NewAI) {
3623 // Ensure the start lines up.
3624 assert(NewBeginOffset == BeginOffset);
3625
3626 // Rewrite the size as needed.
3627 if (NewEndOffset != EndOffset)
3628 II.setLength(NewEndOffset - NewBeginOffset);
3629 return false;
3630 }
3631 // Record this instruction for deletion.
3632 Pass.DeadInsts.push_back(&II);
3633
3634 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3635 // alloca that should be re-examined after rewriting this instruction.
3636 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3637 if (AllocaInst *AI =
3639 assert(AI != &OldAI && AI != &NewAI &&
3640 "Splittable transfers cannot reach the same alloca on both ends.");
3641 Pass.Worklist.insert(AI);
3642 }
3643
3644 Type *OtherPtrTy = OtherPtr->getType();
3645 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3646
3647 // Compute the relative offset for the other pointer within the transfer.
3648 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3649 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3650 Align OtherAlign =
3651 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3652 OtherAlign =
3653 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3654
3655 if (EmitMemCpy) {
3656 // Compute the other pointer, folding as much as possible to produce
3657 // a single, simple GEP in most cases.
3658 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3659 OtherPtr->getName() + ".");
3660
3661 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3662 Type *SizeTy = II.getLength()->getType();
3663 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3664
3665 Value *DestPtr, *SrcPtr;
3666 MaybeAlign DestAlign, SrcAlign;
3667 // Note: IsDest is true iff we're copying into the new alloca slice
3668 if (IsDest) {
3669 DestPtr = OurPtr;
3670 DestAlign = SliceAlign;
3671 SrcPtr = OtherPtr;
3672 SrcAlign = OtherAlign;
3673 } else {
3674 DestPtr = OtherPtr;
3675 DestAlign = OtherAlign;
3676 SrcPtr = OurPtr;
3677 SrcAlign = SliceAlign;
3678 }
3679 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3680 Size, II.isVolatile());
3681 if (AATags)
3682 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3683
3684 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3685 if (IsDest) {
3686 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3687 &II, New, DestPtr, nullptr, DL);
3688 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3690 DL, Offset, /*AllowNonInbounds*/ true))) {
3691 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
3692 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3693 }
3694 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3695 return false;
3696 }
3697
3698 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3699 NewEndOffset == NewAllocaEndOffset;
3700 uint64_t Size = NewEndOffset - NewBeginOffset;
3701 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3702 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3703 unsigned NumElements = EndIndex - BeginIndex;
3704 IntegerType *SubIntTy =
3705 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3706
3707 // Reset the other pointer type to match the register type we're going to
3708 // use, but using the address space of the original other pointer.
3709 Type *OtherTy;
3710 if (VecTy && !IsWholeAlloca) {
3711 if (NumElements == 1)
3712 OtherTy = VecTy->getElementType();
3713 else
3714 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3715 } else if (IntTy && !IsWholeAlloca) {
3716 OtherTy = SubIntTy;
3717 } else {
3718 OtherTy = NewAllocaTy;
3719 }
3720
3721 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3722 OtherPtr->getName() + ".");
3723 MaybeAlign SrcAlign = OtherAlign;
3724 MaybeAlign DstAlign = SliceAlign;
3725 if (!IsDest)
3726 std::swap(SrcAlign, DstAlign);
3727
3728 Value *SrcPtr;
3729 Value *DstPtr;
3730
3731 if (IsDest) {
3732 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3733 SrcPtr = AdjPtr;
3734 } else {
3735 DstPtr = AdjPtr;
3736 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3737 }
3738
3739 Value *Src;
3740 if (VecTy && !IsWholeAlloca && !IsDest) {
3741 Src =
3742 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3743 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3744 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3745 Src =
3746 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3747 Src = IRB.CreateBitPreservingCastChain(DL, Src, IntTy);
3748 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3749 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3750 } else {
3751 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3752 II.isVolatile(), "copyload");
3753 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3754 LLVMContext::MD_access_group});
3755 if (AATags)
3756 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3757 Load->getType(), DL));
3758 Src = Load;
3759 }
3760
3761 if (VecTy && !IsWholeAlloca && IsDest) {
3762 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3763 "oldload");
3764 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3765 } else if (IntTy && !IsWholeAlloca && IsDest) {
3766 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3767 "oldload");
3768 Old = IRB.CreateBitPreservingCastChain(DL, Old, IntTy);
3769 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3770 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3771 Src = IRB.CreateBitPreservingCastChain(DL, Src, NewAllocaTy);
3772 }
3773
3774 StoreInst *Store = cast<StoreInst>(
3775 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3776 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3777 LLVMContext::MD_access_group});
3778 if (AATags)
3779 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3780 Src->getType(), DL));
3781
3782 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3783 if (IsDest) {
3784
3785 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3786 Store, DstPtr, Src, DL);
3787 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3789 DL, Offset, /*AllowNonInbounds*/ true))) {
3790 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
3791 &II, Store, DstPtr, Src, DL);
3792 }
3793
3794 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3795 return !II.isVolatile();
3796 }
3797
3798 bool visitIntrinsicInst(IntrinsicInst &II) {
3799 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
3800 "Unexpected intrinsic!");
3801 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3802
3803 // Record this instruction for deletion.
3804 Pass.DeadInsts.push_back(&II);
3805
3806 if (II.isDroppable()) {
3807 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3808 // TODO For now we forget assumed information, this can be improved.
3809 OldPtr->dropDroppableUsesIn(II);
3810 return true;
3811 }
3812
3813 assert(II.getArgOperand(0) == OldPtr);
3814 Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
3815 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3816 Value *New;
3817 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3818 New = IRB.CreateLifetimeStart(Ptr);
3819 else
3820 New = IRB.CreateLifetimeEnd(Ptr);
3821
3822 (void)New;
3823 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3824
3825 return true;
3826 }
3827
3828 void fixLoadStoreAlign(Instruction &Root) {
3829 // This algorithm implements the same visitor loop as
3830 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3831 // or store found.
3832 SmallPtrSet<Instruction *, 4> Visited;
3833 SmallVector<Instruction *, 4> Uses;
3834 Visited.insert(&Root);
3835 Uses.push_back(&Root);
3836 do {
3837 Instruction *I = Uses.pop_back_val();
3838
3839 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3840 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3841 continue;
3842 }
3843 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3844 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3845 continue;
3846 }
3847
3851 for (User *U : I->users())
3852 if (Visited.insert(cast<Instruction>(U)).second)
3853 Uses.push_back(cast<Instruction>(U));
3854 } while (!Uses.empty());
3855 }
3856
3857 bool visitPHINode(PHINode &PN) {
3858 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3859 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3860 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3861
3862 // We would like to compute a new pointer in only one place, but have it be
3863 // as local as possible to the PHI. To do that, we re-use the location of
3864 // the old pointer, which necessarily must be in the right position to
3865 // dominate the PHI.
3866 IRBuilderBase::InsertPointGuard Guard(IRB);
3867 if (isa<PHINode>(OldPtr))
3868 IRB.SetInsertPoint(OldPtr->getParent(),
3869 OldPtr->getParent()->getFirstInsertionPt());
3870 else
3871 IRB.SetInsertPoint(OldPtr);
3872 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3873
3874 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3875 // Replace the operands which were using the old pointer.
3876 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
3877
3878 LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
3879 deleteIfTriviallyDead(OldPtr);
3880
3881 // Fix the alignment of any loads or stores using this PHI node.
3882 fixLoadStoreAlign(PN);
3883
3884 // PHIs can't be promoted on their own, but often can be speculated. We
3885 // check the speculation outside of the rewriter so that we see the
3886 // fully-rewritten alloca.
3887 PHIUsers.insert(&PN);
3888 return true;
3889 }
3890
3891 bool visitSelectInst(SelectInst &SI) {
3892 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3893 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
3894 "Pointer isn't an operand!");
3895 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
3896 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
3897
3898 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3899 // Replace the operands which were using the old pointer.
3900 if (SI.getOperand(1) == OldPtr)
3901 SI.setOperand(1, NewPtr);
3902 if (SI.getOperand(2) == OldPtr)
3903 SI.setOperand(2, NewPtr);
3904
3905 LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
3906 deleteIfTriviallyDead(OldPtr);
3907
3908 // Fix the alignment of any loads or stores using this select.
3909 fixLoadStoreAlign(SI);
3910
3911 // Selects can't be promoted on their own, but often can be speculated. We
3912 // check the speculation outside of the rewriter so that we see the
3913 // fully-rewritten alloca.
3914 SelectUsers.insert(&SI);
3915 return true;
3916 }
3917};
3918
3919/// Visitor to rewrite aggregate loads and stores as scalar.
3920///
3921/// This pass aggressively rewrites all aggregate loads and stores on
3922/// a particular pointer (or any pointer derived from it which we can identify)
3923/// with scalar loads and stores.
3924class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3925 // Befriend the base class so it can delegate to private visit methods.
3926 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3927
3928 /// Queue of pointer uses to analyze and potentially rewrite.
3930
3931 /// Set to prevent us from cycling with phi nodes and loops.
3932 SmallPtrSet<User *, 8> Visited;
3933
3934 /// The current pointer use being rewritten. This is used to dig up the used
3935 /// value (as opposed to the user).
3936 Use *U = nullptr;
3937
3938 /// Used to calculate offsets, and hence alignment, of subobjects.
3939 const DataLayout &DL;
3940
3941 IRBuilderTy &IRB;
3942
3943public:
3944 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
3945 : DL(DL), IRB(IRB) {}
3946
3947 /// Rewrite loads and stores through a pointer and all pointers derived from
3948 /// it.
3949 bool rewrite(Instruction &I) {
3950 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
3951 enqueueUsers(I);
3952 bool Changed = false;
3953 while (!Queue.empty()) {
3954 U = Queue.pop_back_val();
3955 Changed |= visit(cast<Instruction>(U->getUser()));
3956 }
3957 return Changed;
3958 }
3959
3960private:
3961 /// Enqueue all the users of the given instruction for further processing.
3962 /// This uses a set to de-duplicate users.
3963 void enqueueUsers(Instruction &I) {
3964 for (Use &U : I.uses())
3965 if (Visited.insert(U.getUser()).second)
3966 Queue.push_back(&U);
3967 }
3968
3969 // Conservative default is to not rewrite anything.
3970 bool visitInstruction(Instruction &I) { return false; }
3971
3972 /// Generic recursive split emission class.
3973 template <typename Derived> class OpSplitter {
3974 protected:
3975 /// The builder used to form new instructions.
3976 IRBuilderTy &IRB;
3977
3978 /// The indices which to be used with insert- or extractvalue to select the
3979 /// appropriate value within the aggregate.
3980 SmallVector<unsigned, 4> Indices;
3981
3982 /// The indices to a GEP instruction which will move Ptr to the correct slot
3983 /// within the aggregate.
3984 SmallVector<Value *, 4> GEPIndices;
3985
3986 /// The base pointer of the original op, used as a base for GEPing the
3987 /// split operations.
3988 Value *Ptr;
3989
3990 /// The base pointee type being GEPed into.
3991 Type *BaseTy;
3992
3993 /// Known alignment of the base pointer.
3994 Align BaseAlign;
3995
3996 /// To calculate offset of each component so we can correctly deduce
3997 /// alignments.
3998 const DataLayout &DL;
3999
4000 /// Initialize the splitter with an insertion point, Ptr and start with a
4001 /// single zero GEP index.
4002 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4003 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
4004 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4005 BaseAlign(BaseAlign), DL(DL) {
4006 IRB.SetInsertPoint(InsertionPoint);
4007 }
4008
4009 public:
4010 /// Generic recursive split emission routine.
4011 ///
4012 /// This method recursively splits an aggregate op (load or store) into
4013 /// scalar or vector ops. It splits recursively until it hits a single value
4014 /// and emits that single value operation via the template argument.
4015 ///
4016 /// The logic of this routine relies on GEPs and insertvalue and
4017 /// extractvalue all operating with the same fundamental index list, merely
4018 /// formatted differently (GEPs need actual values).
4019 ///
4020 /// \param Ty The type being split recursively into smaller ops.
4021 /// \param Agg The aggregate value being built up or stored, depending on
4022 /// whether this is splitting a load or a store respectively.
4023 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
4024 if (Ty->isSingleValueType()) {
4025 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4026 return static_cast<Derived *>(this)->emitFunc(
4027 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
4028 }
4029
4030 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
4031 unsigned OldSize = Indices.size();
4032 (void)OldSize;
4033 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
4034 ++Idx) {
4035 assert(Indices.size() == OldSize && "Did not return to the old size");
4036 Indices.push_back(Idx);
4037 GEPIndices.push_back(IRB.getInt32(Idx));
4038 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
4039 GEPIndices.pop_back();
4040 Indices.pop_back();
4041 }
4042 return;
4043 }
4044
4045 if (StructType *STy = dyn_cast<StructType>(Ty)) {
4046 unsigned OldSize = Indices.size();
4047 (void)OldSize;
4048 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
4049 ++Idx) {
4050 assert(Indices.size() == OldSize && "Did not return to the old size");
4051 Indices.push_back(Idx);
4052 GEPIndices.push_back(IRB.getInt32(Idx));
4053 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
4054 GEPIndices.pop_back();
4055 Indices.pop_back();
4056 }
4057 return;
4058 }
4059
4060 llvm_unreachable("Only arrays and structs are aggregate loadable types");
4061 }
4062 };
4063
4064 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
4065 AAMDNodes AATags;
4066 // A vector to hold the split components that we want to emit
4067 // separate fake uses for.
4068 SmallVector<Value *, 4> Components;
4069 // A vector to hold all the fake uses of the struct that we are splitting.
4070 // Usually there should only be one, but we are handling the general case.
4072
4073 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4074 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
4075 IRBuilderTy &IRB)
4076 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
4077 IRB),
4078 AATags(AATags) {}
4079
4080 /// Emit a leaf load of a single value. This is called at the leaves of the
4081 /// recursive emission to actually load values.
4082 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4084 // Load the single value and insert it using the indices.
4085 Value *GEP =
4086 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4087 LoadInst *Load =
4088 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
4089
4090 APInt Offset(
4091 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4092 if (AATags &&
4093 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
4094 Load->setAAMetadata(
4095 AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
4096 // Record the load so we can generate a fake use for this aggregate
4097 // component.
4098 Components.push_back(Load);
4099
4100 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
4101 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
4102 }
4103
4104 // Stash the fake uses that use the value generated by this instruction.
4105 void recordFakeUses(LoadInst &LI) {
4106 for (Use &U : LI.uses())
4107 if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
4108 if (II->getIntrinsicID() == Intrinsic::fake_use)
4109 FakeUses.push_back(II);
4110 }
4111
4112 // Replace all fake uses of the aggregate with a series of fake uses, one
4113 // for each split component.
4114 void emitFakeUses() {
4115 for (Instruction *I : FakeUses) {
4116 IRB.SetInsertPoint(I);
4117 for (auto *V : Components)
4118 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});
4119 I->eraseFromParent();
4120 }
4121 }
4122 };
4123
4124 bool visitLoadInst(LoadInst &LI) {
4125 assert(LI.getPointerOperand() == *U);
4126 if (!LI.isSimple() || LI.getType()->isSingleValueType())
4127 return false;
4128
4129 // We have an aggregate being loaded, split it apart.
4130 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
4131 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
4132 getAdjustedAlignment(&LI, 0), DL, IRB);
4133 Splitter.recordFakeUses(LI);
4135 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
4136 Splitter.emitFakeUses();
4137 Visited.erase(&LI);
4138 LI.replaceAllUsesWith(V);
4139 LI.eraseFromParent();
4140 return true;
4141 }
4142
4143 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
4144 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4145 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4146 const DataLayout &DL, IRBuilderTy &IRB)
4147 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4148 DL, IRB),
4149 AATags(AATags), AggStore(AggStore) {}
4150 AAMDNodes AATags;
4151 StoreInst *AggStore;
4152 /// Emit a leaf store of a single value. This is called at the leaves of the
4153 /// recursive emission to actually produce stores.
4154 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4156 // Extract the single value and store it using the indices.
4157 //
4158 // The gep and extractvalue values are factored out of the CreateStore
4159 // call to make the output independent of the argument evaluation order.
4160 Value *ExtractValue =
4161 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
4162 Value *InBoundsGEP =
4163 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4164 StoreInst *Store =
4165 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4166
4167 APInt Offset(
4168 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4169 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
4170 if (AATags) {
4171 Store->setAAMetadata(AATags.adjustForAccess(
4172 Offset.getZExtValue(), ExtractValue->getType(), DL));
4173 }
4174
4175 // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
4176 // If we cannot (because there's an intervening non-const or unbounded
4177 // gep) then we wouldn't expect to see dbg.assign intrinsics linked to
4178 // this instruction.
4180 if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
4181 uint64_t SizeInBits =
4182 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
4183 migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
4184 SizeInBits, AggStore, Store,
4185 Store->getPointerOperand(), Store->getValueOperand(),
4186 DL);
4187 } else {
4189 "AT: unexpected debug.assign linked to store through "
4190 "unbounded GEP");
4191 }
4192 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
4193 }
4194 };
4195
4196 bool visitStoreInst(StoreInst &SI) {
4197 if (!SI.isSimple() || SI.getPointerOperand() != *U)
4198 return false;
4199 Value *V = SI.getValueOperand();
4200 if (V->getType()->isSingleValueType())
4201 return false;
4202
4203 // We have an aggregate being stored, split it apart.
4204 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
4205 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
4206 getAdjustedAlignment(&SI, 0), DL, IRB);
4207 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
4208 Visited.erase(&SI);
4209 // The stores replacing SI each have markers describing fragments of the
4210 // assignment so delete the assignment markers linked to SI.
4212 SI.eraseFromParent();
4213 return true;
4214 }
4215
4216 bool visitBitCastInst(BitCastInst &BC) {
4217 enqueueUsers(BC);
4218 return false;
4219 }
4220
4221 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4222 enqueueUsers(ASC);
4223 return false;
4224 }
4225
4226 // Unfold gep (select cond, ptr1, ptr2), idx
4227 // => select cond, gep(ptr1, idx), gep(ptr2, idx)
4228 // and gep ptr, (select cond, idx1, idx2)
4229 // => select cond, gep(ptr, idx1), gep(ptr, idx2)
4230 // We also allow for i1 zext indices, which are equivalent to selects.
4231 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4232 // Check whether the GEP has exactly one select operand and all indices
4233 // will become constant after the transform.
4235 for (Value *Op : GEPI.indices()) {
4236 if (auto *SI = dyn_cast<SelectInst>(Op)) {
4237 if (Sel)
4238 return false;
4239
4240 Sel = SI;
4241 if (!isa<ConstantInt>(SI->getTrueValue()) ||
4242 !isa<ConstantInt>(SI->getFalseValue()))
4243 return false;
4244 continue;
4245 }
4246 if (auto *ZI = dyn_cast<ZExtInst>(Op)) {
4247 if (Sel)
4248 return false;
4249 Sel = ZI;
4250 if (!ZI->getSrcTy()->isIntegerTy(1))
4251 return false;
4252 continue;
4253 }
4254
4255 if (!isa<ConstantInt>(Op))
4256 return false;
4257 }
4258
4259 if (!Sel)
4260 return false;
4261
4262 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4263 dbgs() << " original: " << *Sel << "\n";
4264 dbgs() << " " << GEPI << "\n";);
4265
4266 auto GetNewOps = [&](Value *SelOp) {
4267 SmallVector<Value *> NewOps;
4268 for (Value *Op : GEPI.operands())
4269 if (Op == Sel)
4270 NewOps.push_back(SelOp);
4271 else
4272 NewOps.push_back(Op);
4273 return NewOps;
4274 };
4275
4276 Value *Cond, *True, *False;
4277 Instruction *MDFrom = nullptr;
4278 if (auto *SI = dyn_cast<SelectInst>(Sel)) {
4279 Cond = SI->getCondition();
4280 True = SI->getTrueValue();
4281 False = SI->getFalseValue();
4283 MDFrom = SI;
4284 } else {
4285 Cond = Sel->getOperand(0);
4286 True = ConstantInt::get(Sel->getType(), 1);
4287 False = ConstantInt::get(Sel->getType(), 0);
4288 }
4289 SmallVector<Value *> TrueOps = GetNewOps(True);
4290 SmallVector<Value *> FalseOps = GetNewOps(False);
4291
4292 IRB.SetInsertPoint(&GEPI);
4293 GEPNoWrapFlags NW = GEPI.getNoWrapFlags();
4294
4295 Type *Ty = GEPI.getSourceElementType();
4296 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4297 True->getName() + ".sroa.gep", NW);
4298
4299 Value *NFalse =
4300 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4301 False->getName() + ".sroa.gep", NW);
4302
4303 Value *NSel = MDFrom
4304 ? IRB.CreateSelect(Cond, NTrue, NFalse,
4305 Sel->getName() + ".sroa.sel", MDFrom)
4306 : IRB.CreateSelectWithUnknownProfile(
4307 Cond, NTrue, NFalse, DEBUG_TYPE,
4308 Sel->getName() + ".sroa.sel");
4309 Visited.erase(&GEPI);
4310 GEPI.replaceAllUsesWith(NSel);
4311 GEPI.eraseFromParent();
4312 Instruction *NSelI = cast<Instruction>(NSel);
4313 Visited.insert(NSelI);
4314 enqueueUsers(*NSelI);
4315
4316 LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
4317 dbgs() << " " << *NFalse << "\n";
4318 dbgs() << " " << *NSel << "\n";);
4319
4320 return true;
4321 }
4322
4323 // Unfold gep (phi ptr1, ptr2), idx
4324 // => phi ((gep ptr1, idx), (gep ptr2, idx))
4325 // and gep ptr, (phi idx1, idx2)
4326 // => phi ((gep ptr, idx1), (gep ptr, idx2))
4327 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4328 // To prevent infinitely expanding recursive phis, bail if the GEP pointer
4329 // operand (looking through the phi if it is the phi we want to unfold) is
4330 // an instruction besides a static alloca.
4331 PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
4332 auto IsInvalidPointerOperand = [](Value *V) {
4333 if (!isa<Instruction>(V))
4334 return false;
4335 if (auto *AI = dyn_cast<AllocaInst>(V))
4336 return !AI->isStaticAlloca();
4337 return true;
4338 };
4339 if (Phi) {
4340 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4341 return false;
4342 } else {
4343 if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
4344 return false;
4345 }
4346 // Check whether the GEP has exactly one phi operand (including the pointer
4347 // operand) and all indices will become constant after the transform.
4348 for (Value *Op : GEPI.indices()) {
4349 if (auto *SI = dyn_cast<PHINode>(Op)) {
4350 if (Phi)
4351 return false;
4352
4353 Phi = SI;
4354 if (!all_of(Phi->incoming_values(),
4355 [](Value *V) { return isa<ConstantInt>(V); }))
4356 return false;
4357 continue;
4358 }
4359
4360 if (!isa<ConstantInt>(Op))
4361 return false;
4362 }
4363
4364 if (!Phi)
4365 return false;
4366
4367 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4368 dbgs() << " original: " << *Phi << "\n";
4369 dbgs() << " " << GEPI << "\n";);
4370
4371 auto GetNewOps = [&](Value *PhiOp) {
4372 SmallVector<Value *> NewOps;
4373 for (Value *Op : GEPI.operands())
4374 if (Op == Phi)
4375 NewOps.push_back(PhiOp);
4376 else
4377 NewOps.push_back(Op);
4378 return NewOps;
4379 };
4380
4381 IRB.SetInsertPoint(Phi);
4382 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4383 Phi->getName() + ".sroa.phi");
4384
4385 Type *SourceTy = GEPI.getSourceElementType();
4386 // We only handle arguments, constants, and static allocas here, so we can
4387 // insert GEPs at the end of the entry block.
4388 IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
4389 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4390 Value *Op = Phi->getIncomingValue(I);
4391 BasicBlock *BB = Phi->getIncomingBlock(I);
4392 Value *NewGEP;
4393 if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
4394 NewGEP = NewPhi->getIncomingValue(NI);
4395 } else {
4396 SmallVector<Value *> NewOps = GetNewOps(Op);
4397 NewGEP =
4398 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4399 Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
4400 }
4401 NewPhi->addIncoming(NewGEP, BB);
4402 }
4403
4404 Visited.erase(&GEPI);
4405 GEPI.replaceAllUsesWith(NewPhi);
4406 GEPI.eraseFromParent();
4407 Visited.insert(NewPhi);
4408 enqueueUsers(*NewPhi);
4409
4410 LLVM_DEBUG(dbgs() << " to: ";
4411 for (Value *In
4412 : NewPhi->incoming_values()) dbgs()
4413 << "\n " << *In;
4414 dbgs() << "\n " << *NewPhi << '\n');
4415
4416 return true;
4417 }
4418
4419 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4420 if (unfoldGEPSelect(GEPI))
4421 return true;
4422
4423 if (unfoldGEPPhi(GEPI))
4424 return true;
4425
4426 enqueueUsers(GEPI);
4427 return false;
4428 }
4429
4430 bool visitPHINode(PHINode &PN) {
4431 enqueueUsers(PN);
4432 return false;
4433 }
4434
4435 bool visitSelectInst(SelectInst &SI) {
4436 enqueueUsers(SI);
4437 return false;
4438 }
4439};
4440
4441} // end anonymous namespace
4442
4443/// Strip aggregate type wrapping.
4444///
4445/// This removes no-op aggregate types wrapping an underlying type. It will
4446/// strip as many layers of types as it can without changing either the type
4447/// size or the allocated size.
4449 if (Ty->isSingleValueType())
4450 return Ty;
4451
4452 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4453 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
4454
4455 Type *InnerTy;
4456 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4457 InnerTy = ArrTy->getElementType();
4458 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
4459 const StructLayout *SL = DL.getStructLayout(STy);
4460 unsigned Index = SL->getElementContainingOffset(0);
4461 InnerTy = STy->getElementType(Index);
4462 } else {
4463 return Ty;
4464 }
4465
4466 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4467 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4468 return Ty;
4469
4470 return stripAggregateTypeWrapping(DL, InnerTy);
4471}
4472
4473/// Try to find a partition of the aggregate type passed in for a given
4474/// offset and size.
4475///
4476/// This recurses through the aggregate type and tries to compute a subtype
4477/// based on the offset and size. When the offset and size span a sub-section
4478/// of an array, it will even compute a new array type for that sub-section,
4479/// and the same for structs.
4480///
4481/// Note that this routine is very strict and tries to find a partition of the
4482/// type which produces the *exact* right offset and size. It is not forgiving
4483/// when the size or offset cause either end of type-based partition to be off.
4484/// Also, this is a best-effort routine. It is reasonable to give up and not
4485/// return a type if necessary.
4487 uint64_t Size) {
4488 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4489 return stripAggregateTypeWrapping(DL, Ty);
4490 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4491 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4492 return nullptr;
4493
4494 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4495 Type *ElementTy;
4496 uint64_t TyNumElements;
4497 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
4498 ElementTy = AT->getElementType();
4499 TyNumElements = AT->getNumElements();
4500 } else {
4501 // FIXME: This isn't right for vectors with non-byte-sized or
4502 // non-power-of-two sized elements.
4503 auto *VT = cast<FixedVectorType>(Ty);
4504 ElementTy = VT->getElementType();
4505 TyNumElements = VT->getNumElements();
4506 }
4507 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4508 uint64_t NumSkippedElements = Offset / ElementSize;
4509 if (NumSkippedElements >= TyNumElements)
4510 return nullptr;
4511 Offset -= NumSkippedElements * ElementSize;
4512
4513 // First check if we need to recurse.
4514 if (Offset > 0 || Size < ElementSize) {
4515 // Bail if the partition ends in a different array element.
4516 if ((Offset + Size) > ElementSize)
4517 return nullptr;
4518 // Recurse through the element type trying to peel off offset bytes.
4519 return getTypePartition(DL, ElementTy, Offset, Size);
4520 }
4521 assert(Offset == 0);
4522
4523 if (Size == ElementSize)
4524 return stripAggregateTypeWrapping(DL, ElementTy);
4525 assert(Size > ElementSize);
4526 uint64_t NumElements = Size / ElementSize;
4527 if (NumElements * ElementSize != Size)
4528 return nullptr;
4529 return ArrayType::get(ElementTy, NumElements);
4530 }
4531
4533 if (!STy)
4534 return nullptr;
4535
4536 const StructLayout *SL = DL.getStructLayout(STy);
4537
4538 if (SL->getSizeInBits().isScalable())
4539 return nullptr;
4540
4541 if (Offset >= SL->getSizeInBytes())
4542 return nullptr;
4543 uint64_t EndOffset = Offset + Size;
4544 if (EndOffset > SL->getSizeInBytes())
4545 return nullptr;
4546
4547 unsigned Index = SL->getElementContainingOffset(Offset);
4548 Offset -= SL->getElementOffset(Index);
4549
4550 Type *ElementTy = STy->getElementType(Index);
4551 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4552 if (Offset >= ElementSize)
4553 return nullptr; // The offset points into alignment padding.
4554
4555 // See if any partition must be contained by the element.
4556 if (Offset > 0 || Size < ElementSize) {
4557 if ((Offset + Size) > ElementSize)
4558 return nullptr;
4559 return getTypePartition(DL, ElementTy, Offset, Size);
4560 }
4561 assert(Offset == 0);
4562
4563 if (Size == ElementSize)
4564 return stripAggregateTypeWrapping(DL, ElementTy);
4565
4566 StructType::element_iterator EI = STy->element_begin() + Index,
4567 EE = STy->element_end();
4568 if (EndOffset < SL->getSizeInBytes()) {
4569 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
4570 if (Index == EndIndex)
4571 return nullptr; // Within a single element and its padding.
4572
4573 // Don't try to form "natural" types if the elements don't line up with the
4574 // expected size.
4575 // FIXME: We could potentially recurse down through the last element in the
4576 // sub-struct to find a natural end point.
4577 if (SL->getElementOffset(EndIndex) != EndOffset)
4578 return nullptr;
4579
4580 assert(Index < EndIndex);
4581 EE = STy->element_begin() + EndIndex;
4582 }
4583
4584 // Try to build up a sub-structure.
4585 StructType *SubTy =
4586 StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked());
4587 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4588 if (Size != SubSL->getSizeInBytes())
4589 return nullptr; // The sub-struct doesn't have quite the size needed.
4590
4591 return SubTy;
4592}
4593
4594/// Pre-split loads and stores to simplify rewriting.
4595///
4596/// We want to break up the splittable load+store pairs as much as
4597/// possible. This is important to do as a preprocessing step, as once we
4598/// start rewriting the accesses to partitions of the alloca we lose the
4599/// necessary information to correctly split apart paired loads and stores
4600/// which both point into this alloca. The case to consider is something like
4601/// the following:
4602///
4603/// %a = alloca [12 x i8]
4604/// %gep1 = getelementptr i8, ptr %a, i32 0
4605/// %gep2 = getelementptr i8, ptr %a, i32 4
4606/// %gep3 = getelementptr i8, ptr %a, i32 8
4607/// store float 0.0, ptr %gep1
4608/// store float 1.0, ptr %gep2
4609/// %v = load i64, ptr %gep1
4610/// store i64 %v, ptr %gep2
4611/// %f1 = load float, ptr %gep2
4612/// %f2 = load float, ptr %gep3
4613///
4614/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
4615/// promote everything so we recover the 2 SSA values that should have been
4616/// there all along.
4617///
4618/// \returns true if any changes are made.
4619bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4620 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4621
4622 // Track the loads and stores which are candidates for pre-splitting here, in
4623 // the order they first appear during the partition scan. These give stable
4624 // iteration order and a basis for tracking which loads and stores we
4625 // actually split.
4628
4629 // We need to accumulate the splits required of each load or store where we
4630 // can find them via a direct lookup. This is important to cross-check loads
4631 // and stores against each other. We also track the slice so that we can kill
4632 // all the slices that end up split.
4633 struct SplitOffsets {
4634 Slice *S;
4635 std::vector<uint64_t> Splits;
4636 };
4637 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4638
4639 // Track loads out of this alloca which cannot, for any reason, be pre-split.
4640 // This is important as we also cannot pre-split stores of those loads!
4641 // FIXME: This is all pretty gross. It means that we can be more aggressive
4642 // in pre-splitting when the load feeding the store happens to come from
4643 // a separate alloca. Put another way, the effectiveness of SROA would be
4644 // decreased by a frontend which just concatenated all of its local allocas
4645 // into one big flat alloca. But defeating such patterns is exactly the job
4646 // SROA is tasked with! Sadly, to not have this discrepancy we would have
4647 // change store pre-splitting to actually force pre-splitting of the load
4648 // that feeds it *and all stores*. That makes pre-splitting much harder, but
4649 // maybe it would make it more principled?
4650 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4651
4652 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4653 for (auto &P : AS.partitions()) {
4654 for (Slice &S : P) {
4655 Instruction *I = cast<Instruction>(S.getUse()->getUser());
4656 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4657 // If this is a load we have to track that it can't participate in any
4658 // pre-splitting. If this is a store of a load we have to track that
4659 // that load also can't participate in any pre-splitting.
4660 if (auto *LI = dyn_cast<LoadInst>(I))
4661 UnsplittableLoads.insert(LI);
4662 else if (auto *SI = dyn_cast<StoreInst>(I))
4663 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
4664 UnsplittableLoads.insert(LI);
4665 continue;
4666 }
4667 assert(P.endOffset() > S.beginOffset() &&
4668 "Empty or backwards partition!");
4669
4670 // Determine if this is a pre-splittable slice.
4671 if (auto *LI = dyn_cast<LoadInst>(I)) {
4672 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4673
4674 // The load must be used exclusively to store into other pointers for
4675 // us to be able to arbitrarily pre-split it. The stores must also be
4676 // simple to avoid changing semantics.
4677 auto IsLoadSimplyStored = [](LoadInst *LI) {
4678 for (User *LU : LI->users()) {
4679 auto *SI = dyn_cast<StoreInst>(LU);
4680 if (!SI || !SI->isSimple())
4681 return false;
4682 }
4683 return true;
4684 };
4685 if (!IsLoadSimplyStored(LI)) {
4686 UnsplittableLoads.insert(LI);
4687 continue;
4688 }
4689
4690 Loads.push_back(LI);
4691 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4692 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4693 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4694 continue;
4695 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4696 if (!StoredLoad || !StoredLoad->isSimple())
4697 continue;
4698 assert(!SI->isVolatile() && "Cannot split volatile stores!");
4699
4700 Stores.push_back(SI);
4701 } else {
4702 // Other uses cannot be pre-split.
4703 continue;
4704 }
4705
4706 // Record the initial split.
4707 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
4708 auto &Offsets = SplitOffsetsMap[I];
4709 assert(Offsets.Splits.empty() &&
4710 "Should not have splits the first time we see an instruction!");
4711 Offsets.S = &S;
4712 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4713 }
4714
4715 // Now scan the already split slices, and add a split for any of them which
4716 // we're going to pre-split.
4717 for (Slice *S : P.splitSliceTails()) {
4718 auto SplitOffsetsMapI =
4719 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4720 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4721 continue;
4722 auto &Offsets = SplitOffsetsMapI->second;
4723
4724 assert(Offsets.S == S && "Found a mismatched slice!");
4725 assert(!Offsets.Splits.empty() &&
4726 "Cannot have an empty set of splits on the second partition!");
4727 assert(Offsets.Splits.back() ==
4728 P.beginOffset() - Offsets.S->beginOffset() &&
4729 "Previous split does not end where this one begins!");
4730
4731 // Record each split. The last partition's end isn't needed as the size
4732 // of the slice dictates that.
4733 if (S->endOffset() > P.endOffset())
4734 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4735 }
4736 }
4737
4738 // We may have split loads where some of their stores are split stores. For
4739 // such loads and stores, we can only pre-split them if their splits exactly
4740 // match relative to their starting offset. We have to verify this prior to
4741 // any rewriting.
4742 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4743 // Lookup the load we are storing in our map of split
4744 // offsets.
4745 auto *LI = cast<LoadInst>(SI->getValueOperand());
4746 // If it was completely unsplittable, then we're done,
4747 // and this store can't be pre-split.
4748 if (UnsplittableLoads.count(LI))
4749 return true;
4750
4751 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4752 if (LoadOffsetsI == SplitOffsetsMap.end())
4753 return false; // Unrelated loads are definitely safe.
4754 auto &LoadOffsets = LoadOffsetsI->second;
4755
4756 // Now lookup the store's offsets.
4757 auto &StoreOffsets = SplitOffsetsMap[SI];
4758
4759 // If the relative offsets of each split in the load and
4760 // store match exactly, then we can split them and we
4761 // don't need to remove them here.
4762 if (LoadOffsets.Splits == StoreOffsets.Splits)
4763 return false;
4764
4765 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4766 << " " << *LI << "\n"
4767 << " " << *SI << "\n");
4768
4769 // We've found a store and load that we need to split
4770 // with mismatched relative splits. Just give up on them
4771 // and remove both instructions from our list of
4772 // candidates.
4773 UnsplittableLoads.insert(LI);
4774 return true;
4775 });
4776 // Now we have to go *back* through all the stores, because a later store may
4777 // have caused an earlier store's load to become unsplittable and if it is
4778 // unsplittable for the later store, then we can't rely on it being split in
4779 // the earlier store either.
4780 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4781 auto *LI = cast<LoadInst>(SI->getValueOperand());
4782 return UnsplittableLoads.count(LI);
4783 });
4784 // Once we've established all the loads that can't be split for some reason,
4785 // filter any that made it into our list out.
4786 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4787 return UnsplittableLoads.count(LI);
4788 });
4789
4790 // If no loads or stores are left, there is no pre-splitting to be done for
4791 // this alloca.
4792 if (Loads.empty() && Stores.empty())
4793 return false;
4794
4795 // From here on, we can't fail and will be building new accesses, so rig up
4796 // an IR builder.
4797 IRBuilderTy IRB(&AI);
4798
4799 // Collect the new slices which we will merge into the alloca slices.
4800 SmallVector<Slice, 4> NewSlices;
4801
4802 // Track any allocas we end up splitting loads and stores for so we iterate
4803 // on them.
4804 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4805
4806 // At this point, we have collected all of the loads and stores we can
4807 // pre-split, and the specific splits needed for them. We actually do the
4808 // splitting in a specific order in order to handle when one of the loads in
4809 // the value operand to one of the stores.
4810 //
4811 // First, we rewrite all of the split loads, and just accumulate each split
4812 // load in a parallel structure. We also build the slices for them and append
4813 // them to the alloca slices.
4814 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4815 std::vector<LoadInst *> SplitLoads;
4816 const DataLayout &DL = AI.getDataLayout();
4817 for (LoadInst *LI : Loads) {
4818 SplitLoads.clear();
4819
4820 auto &Offsets = SplitOffsetsMap[LI];
4821 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4822 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
4823 "Load must have type size equal to store size");
4824 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
4825 "Load must be >= slice size");
4826
4827 uint64_t BaseOffset = Offsets.S->beginOffset();
4828 assert(BaseOffset + SliceSize > BaseOffset &&
4829 "Cannot represent alloca access size using 64-bit integers!");
4830
4832 IRB.SetInsertPoint(LI);
4833
4834 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4835
4836 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4837 int Idx = 0, Size = Offsets.Splits.size();
4838 for (;;) {
4839 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4840 auto AS = LI->getPointerAddressSpace();
4841 auto *PartPtrTy = LI->getPointerOperandType();
4842 LoadInst *PLoad = IRB.CreateAlignedLoad(
4843 PartTy,
4844 getAdjustedPtr(IRB, DL, BasePtr,
4845 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4846 PartPtrTy, BasePtr->getName() + "."),
4847 getAdjustedAlignment(LI, PartOffset),
4848 /*IsVolatile*/ false, LI->getName());
4849 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4850 LLVMContext::MD_access_group});
4851
4852 // Append this load onto the list of split loads so we can find it later
4853 // to rewrite the stores.
4854 SplitLoads.push_back(PLoad);
4855
4856 // Now build a new slice for the alloca.
4857 NewSlices.push_back(
4858 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4859 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4860 /*IsSplittable*/ false));
4861 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4862 << ", " << NewSlices.back().endOffset()
4863 << "): " << *PLoad << "\n");
4864
4865 // See if we've handled all the splits.
4866 if (Idx >= Size)
4867 break;
4868
4869 // Setup the next partition.
4870 PartOffset = Offsets.Splits[Idx];
4871 ++Idx;
4872 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
4873 }
4874
4875 // Now that we have the split loads, do the slow walk over all uses of the
4876 // load and rewrite them as split stores, or save the split loads to use
4877 // below if the store is going to be split there anyways.
4878 bool DeferredStores = false;
4879 for (User *LU : LI->users()) {
4880 StoreInst *SI = cast<StoreInst>(LU);
4881 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
4882 DeferredStores = true;
4883 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
4884 << "\n");
4885 continue;
4886 }
4887
4888 Value *StoreBasePtr = SI->getPointerOperand();
4889 IRB.SetInsertPoint(SI);
4890 AAMDNodes AATags = SI->getAAMetadata();
4891
4892 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
4893
4894 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
4895 LoadInst *PLoad = SplitLoads[Idx];
4896 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
4897 auto *PartPtrTy = SI->getPointerOperandType();
4898
4899 auto AS = SI->getPointerAddressSpace();
4900 StoreInst *PStore = IRB.CreateAlignedStore(
4901 PLoad,
4902 getAdjustedPtr(IRB, DL, StoreBasePtr,
4903 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4904 PartPtrTy, StoreBasePtr->getName() + "."),
4905 getAdjustedAlignment(SI, PartOffset),
4906 /*IsVolatile*/ false);
4907 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4908 LLVMContext::MD_access_group,
4909 LLVMContext::MD_DIAssignID});
4910
4911 if (AATags)
4912 PStore->setAAMetadata(
4913 AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
4914 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
4915 }
4916
4917 // We want to immediately iterate on any allocas impacted by splitting
4918 // this store, and we have to track any promotable alloca (indicated by
4919 // a direct store) as needing to be resplit because it is no longer
4920 // promotable.
4921 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4922 ResplitPromotableAllocas.insert(OtherAI);
4923 Worklist.insert(OtherAI);
4924 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4925 StoreBasePtr->stripInBoundsOffsets())) {
4926 Worklist.insert(OtherAI);
4927 }
4928
4929 // Mark the original store as dead.
4930 DeadInsts.push_back(SI);
4931 }
4932
4933 // Save the split loads if there are deferred stores among the users.
4934 if (DeferredStores)
4935 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
4936
4937 // Mark the original load as dead and kill the original slice.
4938 DeadInsts.push_back(LI);
4939 Offsets.S->kill();
4940 }
4941
4942 // Second, we rewrite all of the split stores. At this point, we know that
4943 // all loads from this alloca have been split already. For stores of such
4944 // loads, we can simply look up the pre-existing split loads. For stores of
4945 // other loads, we split those loads first and then write split stores of
4946 // them.
4947 for (StoreInst *SI : Stores) {
4948 auto *LI = cast<LoadInst>(SI->getValueOperand());
4949 IntegerType *Ty = cast<IntegerType>(LI->getType());
4950 assert(Ty->getBitWidth() % 8 == 0);
4951 uint64_t StoreSize = Ty->getBitWidth() / 8;
4952 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
4953
4954 auto &Offsets = SplitOffsetsMap[SI];
4955 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
4956 "Slice size should always match load size exactly!");
4957 uint64_t BaseOffset = Offsets.S->beginOffset();
4958 assert(BaseOffset + StoreSize > BaseOffset &&
4959 "Cannot represent alloca access size using 64-bit integers!");
4960
4961 Value *LoadBasePtr = LI->getPointerOperand();
4962 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
4963
4964 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
4965
4966 // Check whether we have an already split load.
4967 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
4968 std::vector<LoadInst *> *SplitLoads = nullptr;
4969 if (SplitLoadsMapI != SplitLoadsMap.end()) {
4970 SplitLoads = &SplitLoadsMapI->second;
4971 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
4972 "Too few split loads for the number of splits in the store!");
4973 } else {
4974 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
4975 }
4976
4977 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4978 int Idx = 0, Size = Offsets.Splits.size();
4979 for (;;) {
4980 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
4981 auto *LoadPartPtrTy = LI->getPointerOperandType();
4982 auto *StorePartPtrTy = SI->getPointerOperandType();
4983
4984 // Either lookup a split load or create one.
4985 LoadInst *PLoad;
4986 if (SplitLoads) {
4987 PLoad = (*SplitLoads)[Idx];
4988 } else {
4989 IRB.SetInsertPoint(LI);
4990 auto AS = LI->getPointerAddressSpace();
4991 PLoad = IRB.CreateAlignedLoad(
4992 PartTy,
4993 getAdjustedPtr(IRB, DL, LoadBasePtr,
4994 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4995 LoadPartPtrTy, LoadBasePtr->getName() + "."),
4996 getAdjustedAlignment(LI, PartOffset),
4997 /*IsVolatile*/ false, LI->getName());
4998 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4999 LLVMContext::MD_access_group});
5000 }
5001
5002 // And store this partition.
5003 IRB.SetInsertPoint(SI);
5004 auto AS = SI->getPointerAddressSpace();
5005 StoreInst *PStore = IRB.CreateAlignedStore(
5006 PLoad,
5007 getAdjustedPtr(IRB, DL, StoreBasePtr,
5008 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5009 StorePartPtrTy, StoreBasePtr->getName() + "."),
5010 getAdjustedAlignment(SI, PartOffset),
5011 /*IsVolatile*/ false);
5012 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5013 LLVMContext::MD_access_group});
5014
5015 // Now build a new slice for the alloca.
5016 NewSlices.push_back(
5017 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5018 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
5019 /*IsSplittable*/ false));
5020 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
5021 << ", " << NewSlices.back().endOffset()
5022 << "): " << *PStore << "\n");
5023 if (!SplitLoads) {
5024 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
5025 }
5026
5027 // See if we've finished all the splits.
5028 if (Idx >= Size)
5029 break;
5030
5031 // Setup the next partition.
5032 PartOffset = Offsets.Splits[Idx];
5033 ++Idx;
5034 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
5035 }
5036
5037 // We want to immediately iterate on any allocas impacted by splitting
5038 // this load, which is only relevant if it isn't a load of this alloca and
5039 // thus we didn't already split the loads above. We also have to keep track
5040 // of any promotable allocas we split loads on as they can no longer be
5041 // promoted.
5042 if (!SplitLoads) {
5043 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
5044 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5045 ResplitPromotableAllocas.insert(OtherAI);
5046 Worklist.insert(OtherAI);
5047 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
5048 LoadBasePtr->stripInBoundsOffsets())) {
5049 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5050 Worklist.insert(OtherAI);
5051 }
5052 }
5053
5054 // Mark the original store as dead now that we've split it up and kill its
5055 // slice. Note that we leave the original load in place unless this store
5056 // was its only use. It may in turn be split up if it is an alloca load
5057 // for some other alloca, but it may be a normal load. This may introduce
5058 // redundant loads, but where those can be merged the rest of the optimizer
5059 // should handle the merging, and this uncovers SSA splits which is more
5060 // important. In practice, the original loads will almost always be fully
5061 // split and removed eventually, and the splits will be merged by any
5062 // trivial CSE, including instcombine.
5063 if (LI->hasOneUse()) {
5064 assert(*LI->user_begin() == SI && "Single use isn't this store!");
5065 DeadInsts.push_back(LI);
5066 }
5067 DeadInsts.push_back(SI);
5068 Offsets.S->kill();
5069 }
5070
5071 // Remove the killed slices that have ben pre-split.
5072 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
5073
5074 // Insert our new slices. This will sort and merge them into the sorted
5075 // sequence.
5076 AS.insert(NewSlices);
5077
5078 LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
5079#ifndef NDEBUG
5080 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
5081 LLVM_DEBUG(AS.print(dbgs(), I, " "));
5082#endif
5083
5084 // Finally, don't try to promote any allocas that new require re-splitting.
5085 // They have already been added to the worklist above.
5086 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5087
5088 return true;
5089}
5090
5091/// Try to canonicalize a homogeneous struct partition to a vector type.
5092///
5093/// We can do this if all the elements of the struct are the same and tightly
5094/// packed. This can sometimes eliminate allocas because structs cannot get
5095/// promoted to LLVM values, but vectors can.
5096///
5097/// We only apply this transformation when all users of the alloca are memory
5098/// intrinsics. Otherwise, if there is a load or store of some other type to the
5099/// partition, SROA would select that type.
5100///
5101/// Applying this transformation too early may hinder memcpyopt, which may
5102/// generate better code when eliminating allocas. For example, see
5103/// `struct-to-vector-fp-store-only-tail.ll`, which demonstrates that applying
5104/// this before memcpyopt can initialize previously uninitialized memory when
5105/// the alloca gets promoted to an SSA value. For another example, see
5106/// `struct-to-vector-before-memcpyopt.ll`, which demonstrates that applying
5107/// this before memcpyopt can result in promoting an alloca so that we load a
5108/// temporary value instead of copying the temporary value into memory, whereas
5109/// memcpyopt eliminates the temporary altogether.
5110///
5111/// As such, we only apply this transformation after memcpyopt has run. We gate
5112/// this transformation by the "AggregateToVector" pass option.
5114 Partition &P,
5115 const DataLayout &DL) {
5116 unsigned NumElts = STy->getNumElements();
5117
5118 Type *EltTy = STy->getElementType(0);
5119 if (!llvm::all_equal(STy->elements()))
5120 return nullptr;
5121
5122 bool IsIntegralPointerTy =
5123 EltTy->isPointerTy() && !DL.isNonIntegralPointerType(EltTy);
5124 if (!EltTy->isIntegerTy() && !EltTy->isFloatingPointTy() &&
5125 !IsIntegralPointerTy)
5126 return nullptr;
5127
5128 auto *VTy = FixedVectorType::get(EltTy, NumElts);
5129 TypeSize StructSize = DL.getStructLayout(STy)->getSizeInBytes();
5130 TypeSize VectorSize = DL.getTypeAllocSize(VTy);
5131 if (StructSize != VectorSize)
5132 return nullptr;
5133
5134 for (const Slice &S : P) {
5135 if (S.isDead())
5136 continue;
5137 auto *U = S.getUse();
5138 if (!U)
5139 continue;
5140
5141 User *Usr = U->getUser();
5143 continue;
5144
5145 if (!isa<MemIntrinsic>(Usr))
5146 return nullptr;
5147 }
5148
5149 return VTy;
5150}
5151
5152/// Select a partition type for an alloca partition.
5153///
5154/// Try to compute a friendly type for this partition of the alloca. This
5155/// won't always succeed, in which case we fall back to a legal integer type
5156/// or an i8 array of an appropriate size.
5157///
5158/// \returns A tuple with the following elements:
5159/// - PartitionType: The computed type for this partition.
5160/// - IsIntegerWideningViable: True if integer widening promotion is used.
5161/// - VectorType: The vector type if vector promotion is used, otherwise
5162/// nullptr.
5163static std::tuple<Type *, bool, VectorType *>
5165 LLVMContext &C, bool AggregateToVector) {
5166 auto LogSelection = [&](StringRef Path, Type *SelectedTy,
5167 VectorType *SelectedVecTy, bool SelectedIntWidening) {
5168 LLVM_DEBUG({
5169 dbgs() << "selectPartitionType path=" << Path
5170 << " func=" << AI.getFunction()->getName() << " alloca=";
5171 if (AI.hasName())
5172 dbgs() << AI.getName();
5173 else
5174 dbgs() << "<unnamed>";
5175 dbgs() << " partition=[" << P.beginOffset() << "," << P.endOffset()
5176 << ") size=" << P.size();
5177 if (std::optional<TypeSize> AllocSize = AI.getAllocationSize(DL))
5178 dbgs() << " alloc-size=" << AllocSize->getKnownMinValue();
5179 if (SelectedTy)
5180 dbgs() << " chosen=" << *SelectedTy;
5181 if (SelectedVecTy)
5182 dbgs() << " vec=" << *SelectedVecTy;
5183 dbgs() << " intwiden=" << SelectedIntWidening << "\n";
5184 });
5185 };
5186 // First check if the partition is viable for vector promotion.
5187 //
5188 // We prefer vector promotion over integer widening promotion when:
5189 // - The vector element type is a floating-point type.
5190 // - All the loads/stores to the alloca are vector loads/stores to the
5191 // entire alloca or load/store a single element of the vector.
5192 //
5193 // Otherwise when there is an integer vector with mixed type loads/stores we
5194 // prefer integer widening promotion because it's more likely the user is
5195 // doing bitwise arithmetic and we generate better code.
5196 VectorType *VecTy =
5198 // If the vector element type is a floating-point type, we prefer vector
5199 // promotion. If the vector has one element, let the below code select
5200 // whether we promote with the vector or scalar.
5201 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5202 VecTy->getElementCount().getFixedValue() > 1) {
5203 LogSelection("direct-fp-vecty", VecTy, VecTy, false);
5204 return {VecTy, false, VecTy};
5205 }
5206
5207 // Check if there is a common type that all slices of the partition use that
5208 // spans the partition.
5209 auto [CommonUseTy, LargestIntTy] =
5210 findCommonType(P.begin(), P.end(), P.endOffset());
5211 if (CommonUseTy) {
5212 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy);
5213 if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) {
5214 // We prefer vector promotion here because if vector promotion is viable
5215 // and there is a common type used, then it implies the second listed
5216 // condition for preferring vector promotion is true.
5217 if (VecTy) {
5218 LogSelection("common-type-vecty", VecTy, VecTy, false);
5219 return {VecTy, false, VecTy};
5220 }
5221 bool IntWiden = isIntegerWideningViable(P, CommonUseTy, DL);
5222 LogSelection("common-type", CommonUseTy, nullptr, IntWiden);
5223 return {CommonUseTy, IntWiden, nullptr};
5224 }
5225 }
5226
5227 // Can we find an appropriate subtype in the original allocated
5228 // type?
5229 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
5230 P.beginOffset(), P.size())) {
5231 // If the partition is an integer array that can be spanned by a legal
5232 // integer type, prefer to represent it as a legal integer type because
5233 // it's more likely to be promotable.
5234 if (TypePartitionTy->isArrayTy() &&
5235 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5236 DL.isLegalInteger(P.size() * 8))
5237 TypePartitionTy = Type::getIntNTy(C, P.size() * 8);
5238 // There was no common type used, so we prefer integer widening promotion.
5239 if (isIntegerWideningViable(P, TypePartitionTy, DL)) {
5240 LogSelection("type-partition-int-widen", TypePartitionTy, nullptr, true);
5241 return {TypePartitionTy, true, nullptr};
5242 }
5243 if (VecTy) {
5244 LogSelection("type-partition-vecty", VecTy, VecTy, false);
5245 return {VecTy, false, VecTy};
5246 }
5247 // If we couldn't promote with TypePartitionTy, try with the largest
5248 // integer type used.
5249 if (LargestIntTy &&
5250 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size() &&
5251 isIntegerWideningViable(P, LargestIntTy, DL)) {
5252 LogSelection("largest-int-int-widen", LargestIntTy, nullptr, true);
5253 return {LargestIntTy, true, nullptr};
5254 }
5255
5256 // Try homogeneous struct to vector canonicalization when requested. Running
5257 // this too early can hide memcpy chains from MemCpyOpt.
5258 if (AggregateToVector) {
5259 if (auto *STy = dyn_cast<StructType>(TypePartitionTy)) {
5260 if (auto *VTy = tryCanonicalizeStructToVector(STy, P, DL)) {
5261 LogSelection("struct-fallback-vecty", VTy, nullptr, false);
5262 return {VTy, false, nullptr};
5263 }
5264 }
5265 }
5266
5267 // Fallback to TypePartitionTy and we probably won't promote.
5268 LogSelection("type-partition-fallback", TypePartitionTy, nullptr, false);
5269 return {TypePartitionTy, false, nullptr};
5270 }
5271
5272 // Select the largest integer type used if it spans the partition.
5273 if (LargestIntTy &&
5274 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size()) {
5275 LogSelection("largest-int-fallback", LargestIntTy, nullptr, false);
5276 return {LargestIntTy, false, nullptr};
5277 }
5278
5279 // Select a legal integer type if it spans the partition.
5280 if (DL.isLegalInteger(P.size() * 8)) {
5281 Type *IntTy = Type::getIntNTy(C, P.size() * 8);
5282 LogSelection("legal-int-fallback", IntTy, nullptr, false);
5283 return {IntTy, false, nullptr};
5284 }
5285
5286 // Fallback to an i8 array.
5287 Type *ArrayTy = ArrayType::get(Type::getInt8Ty(C), P.size());
5288 LogSelection("byte-array-fallback", ArrayTy, nullptr, false);
5289 return {ArrayTy, false, nullptr};
5290}
5291
5292/// Rewrite an alloca partition's users.
5293///
5294/// This routine drives both of the rewriting goals of the SROA pass. It tries
5295/// to rewrite uses of an alloca partition to be conducive for SSA value
5296/// promotion. If the partition needs a new, more refined alloca, this will
5297/// build that new alloca, preserving as much type information as possible, and
5298/// rewrite the uses of the old alloca to point at the new one and have the
5299/// appropriate new offsets. It also evaluates how successful the rewrite was
5300/// at enabling promotion and if it was successful queues the alloca to be
5301/// promoted.
5302std::pair<AllocaInst *, uint64_t>
5303SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P) {
5304 const DataLayout &DL = AI.getDataLayout();
5305 // Select the type for the new alloca that spans the partition.
5306 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5307 selectPartitionType(P, DL, AI, *C, AggregateToVector);
5308
5309 // Check for the case where we're going to rewrite to a new alloca of the
5310 // exact same type as the original, and with the same access offsets. In that
5311 // case, re-use the existing alloca, but still run through the rewriter to
5312 // perform phi and select speculation.
5313 // P.beginOffset() can be non-zero even with the same type in a case with
5314 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
5315 AllocaInst *NewAI;
5316 if (PartitionTy == AI.getAllocatedType() && P.beginOffset() == 0) {
5317 NewAI = &AI;
5318 // FIXME: We should be able to bail at this point with "nothing changed".
5319 // FIXME: We might want to defer PHI speculation until after here.
5320 // FIXME: return nullptr;
5321 } else {
5322 // Make sure the alignment is compatible with P.beginOffset().
5323 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
5324 // If we will get at least this much alignment from the type alone, leave
5325 // the alloca's alignment unconstrained.
5326 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(PartitionTy);
5327 NewAI = new AllocaInst(
5328 PartitionTy, AI.getAddressSpace(), nullptr,
5329 IsUnconstrained ? DL.getPrefTypeAlign(PartitionTy) : Alignment,
5330 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
5331 AI.getIterator());
5332 // Copy the old AI debug location over to the new one.
5333 NewAI->setDebugLoc(AI.getDebugLoc());
5334 ++NumNewAllocas;
5335 }
5336
5337 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
5338 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
5339
5340 // Track the high watermark on the worklist as it is only relevant for
5341 // promoted allocas. We will reset it to this point if the alloca is not in
5342 // fact scheduled for promotion.
5343 unsigned PPWOldSize = PostPromotionWorklist.size();
5344 unsigned NumUses = 0;
5345 SmallSetVector<PHINode *, 8> PHIUsers;
5346 SmallSetVector<SelectInst *, 8> SelectUsers;
5347
5348 AllocaSliceRewriter Rewriter(
5349 DL, AS, *this, AI, *NewAI, PartitionTy, P.beginOffset(), P.endOffset(),
5350 IsIntegerWideningViable, VecTy, PHIUsers, SelectUsers);
5351 bool Promotable = true;
5352 // Check whether we can have tree-structured merge.
5353 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {
5354 NumUses += DeletedValues->size() + 1;
5355 for (Value *V : *DeletedValues)
5356 DeadInsts.push_back(V);
5357 } else {
5358 for (Slice *S : P.splitSliceTails()) {
5359 Promotable &= Rewriter.visit(S);
5360 ++NumUses;
5361 }
5362 for (Slice &S : P) {
5363 Promotable &= Rewriter.visit(&S);
5364 ++NumUses;
5365 }
5366 }
5367
5368 NumAllocaPartitionUses += NumUses;
5369 MaxUsesPerAllocaPartition.updateMax(NumUses);
5370
5371 // Now that we've processed all the slices in the new partition, check if any
5372 // PHIs or Selects would block promotion.
5373 for (PHINode *PHI : PHIUsers)
5374 if (!isSafePHIToSpeculate(*PHI)) {
5375 Promotable = false;
5376 PHIUsers.clear();
5377 SelectUsers.clear();
5378 break;
5379 }
5380
5382 NewSelectsToRewrite;
5383 NewSelectsToRewrite.reserve(SelectUsers.size());
5384 for (SelectInst *Sel : SelectUsers) {
5385 std::optional<RewriteableMemOps> Ops =
5386 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5387 if (!Ops) {
5388 Promotable = false;
5389 PHIUsers.clear();
5390 SelectUsers.clear();
5391 NewSelectsToRewrite.clear();
5392 break;
5393 }
5394 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
5395 }
5396
5397 if (Promotable) {
5398 for (Use *U : AS.getDeadUsesIfPromotable()) {
5399 auto *OldInst = dyn_cast<Instruction>(U->get());
5400 Value::dropDroppableUse(*U);
5401 if (OldInst)
5402 if (isInstructionTriviallyDead(OldInst))
5403 DeadInsts.push_back(OldInst);
5404 }
5405 if (PHIUsers.empty() && SelectUsers.empty()) {
5406 // Promote the alloca.
5407 PromotableAllocas.insert(NewAI);
5408 } else {
5409 // If we have either PHIs or Selects to speculate, add them to those
5410 // worklists and re-queue the new alloca so that we promote in on the
5411 // next iteration.
5412 SpeculatablePHIs.insert_range(PHIUsers);
5413 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5414 NewSelectsToRewrite.size());
5415 for (auto &&KV : llvm::make_range(
5416 std::make_move_iterator(NewSelectsToRewrite.begin()),
5417 std::make_move_iterator(NewSelectsToRewrite.end())))
5418 SelectsToRewrite.insert(std::move(KV));
5419 Worklist.insert(NewAI);
5420 }
5421 } else {
5422 // Drop any post-promotion work items if promotion didn't happen.
5423 while (PostPromotionWorklist.size() > PPWOldSize)
5424 PostPromotionWorklist.pop_back();
5425
5426 // We couldn't promote and we didn't create a new partition, nothing
5427 // happened.
5428 if (NewAI == &AI)
5429 return {nullptr, 0};
5430
5431 // If we can't promote the alloca, iterate on it to check for new
5432 // refinements exposed by splitting the current alloca. Don't iterate on an
5433 // alloca which didn't actually change and didn't get promoted.
5434 Worklist.insert(NewAI);
5435 }
5436
5437 return {NewAI, DL.getTypeSizeInBits(PartitionTy).getFixedValue()};
5438}
5439
5440// There isn't a shared interface to get the "address" parts out of a
5441// dbg.declare and dbg.assign, so provide some wrappers.
5444 return DVR->isKillAddress();
5445 return DVR->isKillLocation();
5446}
5447
5450 return DVR->getAddressExpression();
5451 return DVR->getExpression();
5452}
5453
5454/// Create or replace an existing fragment in a DIExpression with \p Frag.
5455/// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext
5456/// operation, add \p BitExtractOffset to the offset part.
5457///
5458/// Returns the new expression, or nullptr if this fails (see details below).
5459///
5460/// This function is similar to DIExpression::createFragmentExpression except
5461/// for 3 important distinctions:
5462/// 1. The new fragment isn't relative to an existing fragment.
5463/// 2. It assumes the computed location is a memory location. This means we
5464/// don't need to perform checks that creating the fragment preserves the
5465/// expression semantics.
5466/// 3. Existing extract_bits are modified independently of fragment changes
5467/// using \p BitExtractOffset. A change to the fragment offset or size
5468/// may affect a bit extract. But a bit extract offset can change
5469/// independently of the fragment dimensions.
5470///
5471/// Returns the new expression, or nullptr if one couldn't be created.
5472/// Ideally this is only used to signal that a bit-extract has become
5473/// zero-sized (and thus the new debug record has no size and can be
5474/// dropped), however, it fails for other reasons too - see the FIXME below.
5475///
5476/// FIXME: To keep the change that introduces this function NFC it bails
5477/// in some situations unecessarily, e.g. when fragment and bit extract
5478/// sizes differ.
5481 int64_t BitExtractOffset) {
5483 bool HasFragment = false;
5484 bool HasBitExtract = false;
5485
5486 for (auto &Op : Expr->expr_ops()) {
5487 if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
5488 HasFragment = true;
5489 continue;
5490 }
5491 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5493 HasBitExtract = true;
5494 int64_t ExtractOffsetInBits = Op.getArg(0);
5495 int64_t ExtractSizeInBits = Op.getArg(1);
5496
5497 // DIExpression::createFragmentExpression doesn't know how to handle
5498 // a fragment that is smaller than the extract. Copy the behaviour
5499 // (bail) to avoid non-NFC changes.
5500 // FIXME: Don't do this.
5501 if (Frag.SizeInBits < uint64_t(ExtractSizeInBits))
5502 return nullptr;
5503
5504 assert(BitExtractOffset <= 0);
5505 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5506
5507 // DIExpression::createFragmentExpression doesn't know what to do
5508 // if the new extract starts "outside" the existing one. Copy the
5509 // behaviour (bail) to avoid non-NFC changes.
5510 // FIXME: Don't do this.
5511 if (AdjustedOffset < 0)
5512 return nullptr;
5513
5514 Ops.push_back(Op.getOp());
5515 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5516 Ops.push_back(ExtractSizeInBits);
5517 continue;
5518 }
5519 Op.appendToVector(Ops);
5520 }
5521
5522 // Unsupported by createFragmentExpression, so don't support it here yet to
5523 // preserve NFC-ness.
5524 if (HasFragment && HasBitExtract)
5525 return nullptr;
5526
5527 if (!HasBitExtract) {
5529 Ops.push_back(Frag.OffsetInBits);
5530 Ops.push_back(Frag.SizeInBits);
5531 }
5532 return DIExpression::get(Expr->getContext(), Ops);
5533}
5534
5535/// Insert a new DbgRecord.
5536/// \p Orig Original to copy record type, debug loc and variable from, and
5537/// additionally value and value expression for dbg_assign records.
5538/// \p NewAddr Location's new base address.
5539/// \p NewAddrExpr New expression to apply to address.
5540/// \p BeforeInst Insert position.
5541/// \p NewFragment New fragment (absolute, non-relative).
5542/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5543static void
5545 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5546 std::optional<DIExpression::FragmentInfo> NewFragment,
5547 int64_t BitExtractAdjustment) {
5548 (void)DIB;
5549
5550 // A dbg_assign puts fragment info in the value expression only. The address
5551 // expression has already been built: NewAddrExpr. A dbg_declare puts the
5552 // new fragment info into NewAddrExpr (as it only has one expression).
5553 DIExpression *NewFragmentExpr =
5554 Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr;
5555 if (NewFragment)
5556 NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
5557 BitExtractAdjustment);
5558 if (!NewFragmentExpr)
5559 return;
5560
5561 if (Orig->isDbgDeclare()) {
5563 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5564 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5565 BeforeInst->getIterator());
5566 return;
5567 }
5568
5569 if (Orig->isDbgValue()) {
5571 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5572 // Drop debug information if the expression doesn't start with a
5573 // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value
5574 // describes the address of alloca rather than the value inside the alloca.
5575 if (!NewFragmentExpr->startsWithDeref())
5576 DVR->setKillAddress();
5577 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5578 BeforeInst->getIterator());
5579 return;
5580 }
5581
5582 // Apply a DIAssignID to the store if it doesn't already have it.
5583 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5584 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5586 }
5587
5589 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5590 NewAddrExpr, Orig->getDebugLoc());
5591 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5592 (void)NewAssign;
5593}
5594
5595/// Walks the slices of an alloca and form partitions based on them,
5596/// rewriting each of their uses.
5597bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5598 if (AS.begin() == AS.end())
5599 return false;
5600
5601 unsigned NumPartitions = 0;
5602 bool Changed = false;
5603 const DataLayout &DL = AI.getModule()->getDataLayout();
5604
5605 // First try to pre-split loads and stores.
5606 Changed |= presplitLoadsAndStores(AI, AS);
5607
5608 // Now that we have identified any pre-splitting opportunities,
5609 // mark loads and stores unsplittable except for the following case.
5610 // We leave a slice splittable if all other slices are disjoint or fully
5611 // included in the slice, such as whole-alloca loads and stores.
5612 // If we fail to split these during pre-splitting, we want to force them
5613 // to be rewritten into a partition.
5614 bool IsSorted = true;
5615
5616 uint64_t AllocaSize = AI.getAllocationSize(DL)->getFixedValue();
5617 const uint64_t MaxBitVectorSize = 1024;
5618 if (AllocaSize <= MaxBitVectorSize) {
5619 // If a byte boundary is included in any load or store, a slice starting or
5620 // ending at the boundary is not splittable.
5621 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5622 for (Slice &S : AS)
5623 for (unsigned O = S.beginOffset() + 1;
5624 O < S.endOffset() && O < AllocaSize; O++)
5625 SplittableOffset.reset(O);
5626
5627 for (Slice &S : AS) {
5628 if (!S.isSplittable())
5629 continue;
5630
5631 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5632 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5633 continue;
5634
5635 if (isa<LoadInst>(S.getUse()->getUser()) ||
5636 isa<StoreInst>(S.getUse()->getUser())) {
5637 S.makeUnsplittable();
5638 IsSorted = false;
5639 }
5640 }
5641 } else {
5642 // We only allow whole-alloca splittable loads and stores
5643 // for a large alloca to avoid creating too large BitVector.
5644 for (Slice &S : AS) {
5645 if (!S.isSplittable())
5646 continue;
5647
5648 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5649 continue;
5650
5651 if (isa<LoadInst>(S.getUse()->getUser()) ||
5652 isa<StoreInst>(S.getUse()->getUser())) {
5653 S.makeUnsplittable();
5654 IsSorted = false;
5655 }
5656 }
5657 }
5658
5659 if (!IsSorted)
5661
5662 /// Describes the allocas introduced by rewritePartition in order to migrate
5663 /// the debug info.
5664 struct Fragment {
5665 AllocaInst *Alloca;
5666 uint64_t Offset;
5667 uint64_t Size;
5668 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5669 : Alloca(AI), Offset(O), Size(S) {}
5670 };
5671 SmallVector<Fragment, 4> Fragments;
5672
5673 // Rewrite each partition.
5674 for (auto &P : AS.partitions()) {
5675 auto [NewAI, ActiveBits] = rewritePartition(AI, AS, P);
5676 if (NewAI) {
5677 Changed = true;
5678 if (NewAI != &AI) {
5679 uint64_t SizeOfByte = 8;
5680 // Don't include any padding.
5681 uint64_t Size = std::min(ActiveBits, P.size() * SizeOfByte);
5682 Fragments.push_back(
5683 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5684 }
5685 }
5686 ++NumPartitions;
5687 }
5688
5689 NumAllocaPartitions += NumPartitions;
5690 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5691
5692 // Migrate debug information from the old alloca to the new alloca(s)
5693 // and the individual partitions.
5694 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5695 // Can't overlap with undef memory.
5696 if (isKillAddress(DbgVariable))
5697 return;
5698
5699 const Value *DbgPtr = DbgVariable->getAddress();
5701 DbgVariable->getFragmentOrEntireVariable();
5702 // Get the address expression constant offset if one exists and the ops
5703 // that come after it.
5704 int64_t CurrentExprOffsetInBytes = 0;
5705 SmallVector<uint64_t> PostOffsetOps;
5706 if (!getAddressExpression(DbgVariable)
5707 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5708 return; // Couldn't interpret this DIExpression - drop the var.
5709
5710 // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext.
5711 int64_t ExtractOffsetInBits = 0;
5712 for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) {
5713 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5715 ExtractOffsetInBits = Op.getArg(0);
5716 break;
5717 }
5718 }
5719
5720 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
5721 for (auto Fragment : Fragments) {
5722 int64_t OffsetFromLocationInBits;
5723 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5724 // Find the variable fragment that the new alloca slice covers.
5725 // Drop debug info for this variable fragment if we can't compute an
5726 // intersect between it and the alloca slice.
5728 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5729 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5730 NewDbgFragment, OffsetFromLocationInBits))
5731 continue; // Do not migrate this fragment to this slice.
5732
5733 // Zero sized fragment indicates there's no intersect between the variable
5734 // fragment and the alloca slice. Skip this slice for this variable
5735 // fragment.
5736 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5737 continue; // Do not migrate this fragment to this slice.
5738
5739 // No fragment indicates DbgVariable's variable or fragment exactly
5740 // overlaps the slice; copy its fragment (or nullopt if there isn't one).
5741 if (!NewDbgFragment)
5742 NewDbgFragment = DbgVariable->getFragment();
5743
5744 // Reduce the new expression offset by the bit-extract offset since
5745 // we'll be keeping that.
5746 int64_t OffestFromNewAllocaInBits =
5747 OffsetFromLocationInBits - ExtractOffsetInBits;
5748 // We need to adjust an existing bit extract if the offset expression
5749 // can't eat the slack (i.e., if the new offset would be negative).
5750 int64_t BitExtractOffset =
5751 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5752 // The magnitude of a negative value indicates the number of bits into
5753 // the existing variable fragment that the memory region begins. The new
5754 // variable fragment already excludes those bits - the new DbgPtr offset
5755 // only needs to be applied if it's positive.
5756 OffestFromNewAllocaInBits =
5757 std::max(int64_t(0), OffestFromNewAllocaInBits);
5758
5759 // Rebuild the expression:
5760 // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment}
5761 // Add NewDbgFragment later, because dbg.assigns don't want it in the
5762 // address expression but the value expression instead.
5763 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5764 if (OffestFromNewAllocaInBits > 0) {
5765 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5766 NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes);
5767 }
5768
5769 // Remove any existing intrinsics on the new alloca describing
5770 // the variable fragment.
5771 auto RemoveOne = [DbgVariable](auto *OldDII) {
5772 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5773 return LHS->getVariable() == RHS->getVariable() &&
5774 LHS->getDebugLoc()->getInlinedAt() ==
5775 RHS->getDebugLoc()->getInlinedAt();
5776 };
5777 if (SameVariableFragment(OldDII, DbgVariable))
5778 OldDII->eraseFromParent();
5779 };
5780 for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
5781 for_each(findDVRValues(Fragment.Alloca), RemoveOne);
5782 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5783 NewDbgFragment, BitExtractOffset);
5784 }
5785 };
5786
5787 // Migrate debug information from the old alloca to the new alloca(s)
5788 // and the individual partitions.
5789 for_each(findDVRDeclares(&AI), MigrateOne);
5790 for_each(findDVRValues(&AI), MigrateOne);
5791 for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
5792
5793 return Changed;
5794}
5795
5796/// Clobber a use with poison, deleting the used value if it becomes dead.
5797void SROA::clobberUse(Use &U) {
5798 Value *OldV = U;
5799 // Replace the use with an poison value.
5800 U = PoisonValue::get(OldV->getType());
5801
5802 // Check for this making an instruction dead. We have to garbage collect
5803 // all the dead instructions to ensure the uses of any alloca end up being
5804 // minimal.
5805 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
5806 if (isInstructionTriviallyDead(OldI)) {
5807 DeadInsts.push_back(OldI);
5808 }
5809}
5810
5811/// A basic LoadAndStorePromoter that does not remove store nodes.
5813public:
5815 Type *ZeroType)
5816 : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
5817 bool shouldDelete(Instruction *I) const override {
5818 return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
5819 }
5820
5822 return UndefValue::get(ZeroType);
5823 }
5824
5825private:
5826 Type *ZeroType;
5827};
5828
5829bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5830 // Look through each "partition", looking for slices with the same start/end
5831 // that do not overlap with any before them. The slices are sorted by
5832 // increasing beginOffset. We don't use AS.partitions(), as it will use a more
5833 // sophisticated algorithm that takes splittable slices into account.
5834 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5835 bool AllSameAndValid = true;
5836 Type *PartitionType = nullptr;
5838 uint64_t BeginOffset = 0;
5839 uint64_t EndOffset = 0;
5840
5841 auto Flush = [&]() {
5842 if (AllSameAndValid && !Insts.empty()) {
5843 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5844 << EndOffset << ")\n");
5846 SSAUpdater SSA(&NewPHIs);
5847 Insts.push_back(&AI);
5848 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5849 Promoter.run(Insts);
5850 }
5851 AllSameAndValid = true;
5852 PartitionType = nullptr;
5853 Insts.clear();
5854 };
5855
5856 for (Slice &S : AS) {
5857 auto *User = cast<Instruction>(S.getUse()->getUser());
5858 if (isAssumeLikeIntrinsic(User)) {
5859 LLVM_DEBUG({
5860 dbgs() << "Ignoring slice: ";
5861 AS.print(dbgs(), &S);
5862 });
5863 continue;
5864 }
5865 if (S.beginOffset() >= EndOffset) {
5866 Flush();
5867 BeginOffset = S.beginOffset();
5868 EndOffset = S.endOffset();
5869 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5870 if (AllSameAndValid) {
5871 LLVM_DEBUG({
5872 dbgs() << "Slice does not match range [" << BeginOffset << ", "
5873 << EndOffset << ")";
5874 AS.print(dbgs(), &S);
5875 });
5876 AllSameAndValid = false;
5877 }
5878 EndOffset = std::max(EndOffset, S.endOffset());
5879 continue;
5880 }
5881
5882 if (auto *LI = dyn_cast<LoadInst>(User)) {
5883 Type *UserTy = LI->getType();
5884 // LoadAndStorePromoter requires all the types to be the same.
5885 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5886 AllSameAndValid = false;
5887 PartitionType = UserTy;
5888 Insts.push_back(User);
5889 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
5890 Type *UserTy = SI->getValueOperand()->getType();
5891 if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5892 AllSameAndValid = false;
5893 PartitionType = UserTy;
5894 Insts.push_back(User);
5895 } else {
5896 AllSameAndValid = false;
5897 }
5898 }
5899
5900 Flush();
5901 return true;
5902}
5903
5904/// Analyze an alloca for SROA.
5905///
5906/// This analyzes the alloca to ensure we can reason about it, builds
5907/// the slices of the alloca, and then hands it off to be split and
5908/// rewritten as needed.
5909std::pair<bool /*Changed*/, bool /*CFGChanged*/>
5910SROA::runOnAlloca(AllocaInst &AI) {
5911 bool Changed = false;
5912 bool CFGChanged = false;
5913
5914 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5915 ++NumAllocasAnalyzed;
5916
5917 // Special case dead allocas, as they're trivial.
5918 if (AI.use_empty()) {
5919 AI.eraseFromParent();
5920 Changed = true;
5921 return {Changed, CFGChanged};
5922 }
5923 const DataLayout &DL = AI.getDataLayout();
5924
5925 // Skip alloca forms that this analysis can't handle.
5926 std::optional<TypeSize> Size = AI.getAllocationSize(DL);
5927 if (AI.isArrayAllocation() || !Size || Size->isScalable() || Size->isZero())
5928 return {Changed, CFGChanged};
5929
5930 // First, split any FCA loads and stores touching this alloca to promote
5931 // better splitting and promotion opportunities.
5932 IRBuilderTy IRB(&AI);
5933 AggLoadStoreRewriter AggRewriter(DL, IRB);
5934 Changed |= AggRewriter.rewrite(AI);
5935
5936 // Build the slices using a recursive instruction-visiting builder.
5937 AllocaSlices AS(DL, AI);
5938 LLVM_DEBUG(AS.print(dbgs()));
5939 if (AS.isEscaped())
5940 return {Changed, CFGChanged};
5941
5942 if (AS.isEscapedReadOnly()) {
5943 Changed |= propagateStoredValuesToLoads(AI, AS);
5944 return {Changed, CFGChanged};
5945 }
5946
5947 // Delete all the dead users of this alloca before splitting and rewriting it.
5948 for (Instruction *DeadUser : AS.getDeadUsers()) {
5949 // Free up everything used by this instruction.
5950 for (Use &DeadOp : DeadUser->operands())
5951 clobberUse(DeadOp);
5952
5953 // Now replace the uses of this instruction.
5954 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
5955
5956 // And mark it for deletion.
5957 DeadInsts.push_back(DeadUser);
5958 Changed = true;
5959 }
5960 for (Use *DeadOp : AS.getDeadOperands()) {
5961 clobberUse(*DeadOp);
5962 Changed = true;
5963 }
5964
5965 // No slices to split. Leave the dead alloca for a later pass to clean up.
5966 if (AS.begin() == AS.end())
5967 return {Changed, CFGChanged};
5968
5969 Changed |= splitAlloca(AI, AS);
5970
5971 LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
5972 while (!SpeculatablePHIs.empty())
5973 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
5974
5975 LLVM_DEBUG(dbgs() << " Rewriting Selects\n");
5976 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5977 while (!RemainingSelectsToRewrite.empty()) {
5978 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
5979 CFGChanged |=
5980 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
5981 }
5982
5983 return {Changed, CFGChanged};
5984}
5985
5986/// Delete the dead instructions accumulated in this run.
5987///
5988/// Recursively deletes the dead instructions we've accumulated. This is done
5989/// at the very end to maximize locality of the recursive delete and to
5990/// minimize the problems of invalidated instruction pointers as such pointers
5991/// are used heavily in the intermediate stages of the algorithm.
5992///
5993/// We also record the alloca instructions deleted here so that they aren't
5994/// subsequently handed to mem2reg to promote.
5995bool SROA::deleteDeadInstructions(
5996 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5997 bool Changed = false;
5998 while (!DeadInsts.empty()) {
5999 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
6000 if (!I)
6001 continue;
6002 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
6003
6004 // If the instruction is an alloca, find the possible dbg.declare connected
6005 // to it, and remove it too. We must do this before calling RAUW or we will
6006 // not be able to find it.
6007 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6008 DeletedAllocas.insert(AI);
6009 for (DbgVariableRecord *OldDII : findDVRDeclares(AI))
6010 OldDII->eraseFromParent();
6011 }
6012
6014 I->replaceAllUsesWith(UndefValue::get(I->getType()));
6015
6016 for (Use &Operand : I->operands())
6017 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
6018 // Zero out the operand and see if it becomes trivially dead.
6019 Operand = nullptr;
6021 DeadInsts.push_back(U);
6022 }
6023
6024 ++NumDeleted;
6025 I->eraseFromParent();
6026 Changed = true;
6027 }
6028 return Changed;
6029}
6030/// Promote the allocas, using the best available technique.
6031///
6032/// This attempts to promote whatever allocas have been identified as viable in
6033/// the PromotableAllocas list. If that list is empty, there is nothing to do.
6034/// This function returns whether any promotion occurred.
6035bool SROA::promoteAllocas() {
6036 if (PromotableAllocas.empty())
6037 return false;
6038
6039 if (SROASkipMem2Reg) {
6040 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
6041 } else {
6042 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
6043 NumPromoted += PromotableAllocas.size();
6044 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6045 }
6046
6047 PromotableAllocas.clear();
6048 return true;
6049}
6050
6051std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
6052 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
6053
6054 const DataLayout &DL = F.getDataLayout();
6055 BasicBlock &EntryBB = F.getEntryBlock();
6056 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
6057 I != E; ++I) {
6058 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6059 std::optional<TypeSize> Size = AI->getAllocationSize(DL);
6060 if (Size && Size->isScalable() && isAllocaPromotable(AI))
6061 PromotableAllocas.insert(AI);
6062 else
6063 Worklist.insert(AI);
6064 }
6065 }
6066
6067 bool Changed = false;
6068 bool CFGChanged = false;
6069 // A set of deleted alloca instruction pointers which should be removed from
6070 // the list of promotable allocas.
6071 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6072
6073 do {
6074 while (!Worklist.empty()) {
6075 auto [IterationChanged, IterationCFGChanged] =
6076 runOnAlloca(*Worklist.pop_back_val());
6077 Changed |= IterationChanged;
6078 CFGChanged |= IterationCFGChanged;
6079
6080 Changed |= deleteDeadInstructions(DeletedAllocas);
6081
6082 // Remove the deleted allocas from various lists so that we don't try to
6083 // continue processing them.
6084 if (!DeletedAllocas.empty()) {
6085 Worklist.set_subtract(DeletedAllocas);
6086 PostPromotionWorklist.set_subtract(DeletedAllocas);
6087 PromotableAllocas.set_subtract(DeletedAllocas);
6088 DeletedAllocas.clear();
6089 }
6090 }
6091
6092 Changed |= promoteAllocas();
6093
6094 Worklist = PostPromotionWorklist;
6095 PostPromotionWorklist.clear();
6096 } while (!Worklist.empty());
6097
6098 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
6099 assert((!CFGChanged || !PreserveCFG) &&
6100 "Should not have modified the CFG when told to preserve it.");
6101
6102 if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
6103 for (auto &BB : F) {
6105 }
6106 }
6107
6108 return {Changed, CFGChanged};
6109}
6110
6114 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6115 auto [Changed, CFGChanged] =
6116 SROA(&F.getContext(), &DTU, &AC, Options).runSROA(F);
6117 if (!Changed)
6118 return PreservedAnalyses::all();
6120 if (!CFGChanged)
6123 return PA;
6124}
6125
6127 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6128 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
6129 OS, MapClassName2PassName);
6130 OS << '<'
6131 << (Options.CFG == SROAOptions::PreserveCFG ? "preserve-cfg"
6132 : "modify-cfg");
6133 if (Options.AggregateToVector)
6134 OS << ";aggregate-to-vector";
6135 OS << '>';
6136}
6137
6138SROAPass::SROAPass(SROAOptions Options) : Options(Options) {}
6139
6140namespace {
6141
6142/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
6143class SROALegacyPass : public FunctionPass {
6145
6146public:
6147 static char ID;
6148
6152 }
6153
6154 bool runOnFunction(Function &F) override {
6155 if (skipFunction(F))
6156 return false;
6157
6158 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6159 AssumptionCache &AC =
6160 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6161 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6162 auto [Changed, _] = SROA(&F.getContext(), &DTU, &AC, Options).runSROA(F);
6163 return Changed;
6164 }
6165
6166 void getAnalysisUsage(AnalysisUsage &AU) const override {
6167 AU.addRequired<AssumptionCacheTracker>();
6168 AU.addRequired<DominatorTreeWrapperPass>();
6169 AU.addPreserved<GlobalsAAWrapperPass>();
6170 AU.addPreserved<DominatorTreeWrapperPass>();
6171 }
6172
6173 StringRef getPassName() const override { return "SROA"; }
6174};
6175
6176} // end anonymous namespace
6177
6178char SROALegacyPass::ID = 0;
6179
6180FunctionPass *llvm::createSROAPass(bool PreserveCFG, bool AggregateToVector) {
6181 return new SROALegacyPass(SROAOptions(PreserveCFG ? SROAOptions::PreserveCFG
6183 AggregateToVector));
6184}
6185
6186INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
6187 "Scalar Replacement Of Aggregates", false, false)
6190INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Flatten the CFG
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:598
This file implements a map that provides insertion order iteration.
static std::optional< AllocFnsTy > getAllocationSize(const CallBase *CB, const TargetLibraryInfo *TLI)
static std::optional< uint64_t > getSizeInBytes(std::optional< uint64_t > SizeInBits)
Memory SSA
Definition MemorySSA.cpp:72
This file contains the declarations for metadata subclasses.
#define T
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition SROA.cpp:345
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition SROA.cpp:2242
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition SROA.cpp:1920
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
Definition SROA.cpp:2093
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition SROA.cpp:1486
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition SROA.cpp:4448
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition SROA.cpp:280
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition SROA.cpp:5479
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2485
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
Definition SROA.cpp:2018
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition SROA.cpp:1909
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition SROA.cpp:2324
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition SROA.cpp:2518
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition SROA.cpp:1008
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition SROA.cpp:1875
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition SROA.cpp:1808
static Value * foldSelectInst(SelectInst &SI)
Definition SROA.cpp:995
bool isKillAddress(const DbgVariableRecord *DVR)
Definition SROA.cpp:5442
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition SROA.cpp:2540
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition SROA.cpp:2419
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition SROA.cpp:1626
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
Definition SROA.cpp:2198
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
Definition SROA.cpp:326
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C, bool AggregateToVector)
Select a partition type for an alloca partition.
Definition SROA.cpp:5164
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition SROA.cpp:1552
static FixedVectorType * tryCanonicalizeStructToVector(StructType *STy, Partition &P, const DataLayout &DL)
Try to canonicalize a homogeneous struct partition to a vector type.
Definition SROA.cpp:5113
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2460
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
Definition SROA.cpp:5544
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition SROA.cpp:1769
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
Definition SROA.cpp:2612
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
Definition SROA.cpp:5448
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition SROA.cpp:4486
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
Definition SROA.cpp:1930
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition SROA.cpp:1707
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Value * RHS
Value * LHS
Builder for the alloca slices.
Definition SROA.cpp:1020
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition SROA.cpp:1036
An iterator over partitions of the alloca's slices.
Definition SROA.cpp:808
bool operator==(const partition_iterator &RHS) const
Definition SROA.cpp:955
partition_iterator & operator++()
Definition SROA.cpp:975
bool shouldDelete(Instruction *I) const override
Return false if a sub-class wants to keep one of the loads/stores after the SSA construction.
Definition SROA.cpp:5817
BasicLoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, Type *ZeroType)
Definition SROA.cpp:5814
Value * getValueToUseForAlloca(Instruction *I) const override
Return the value to use for the point in the code that the alloca is positioned.
Definition SROA.cpp:5821
Class for arbitrary precision integers.
Definition APInt.h:78
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
DWARF expression.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Definition DebugLoc.cpp:55
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
Analysis pass which computes a DominatorTree.
Definition Dominators.h:274
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:310
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:155
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
Definition Function.h:809
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Definition Operator.cpp:125
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition IRBuilder.h:61
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition IRBuilder.h:65
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2868
Base class for instruction visitors.
Definition InstVisitor.h:78
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI LoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, StringRef Name=StringRef())
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1567
LLVMContext & getContext() const
Definition Metadata.h:1239
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
PtrUseVisitor(const DataLayout &DL)
LLVM_ABI SROAPass(SROAOptions Options)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition SROA.cpp:6138
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition SROA.cpp:6111
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition SROA.cpp:6126
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition StringRef.h:365
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition StringRef.h:290
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:743
TypeSize getSizeInBytes() const
Definition DataLayout.h:752
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:774
TypeSize getSizeInBits() const
Definition DataLayout.h:754
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:479
element_iterator element_end() const
ArrayRef< Type * > elements() const
element_iterator element_begin() const
bool isPacked() const
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:311
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:205
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
op_iterator op_begin()
Definition User.h:259
const Use & getOperandUse(unsigned i) const
Definition User.h:220
Value * getOperand(unsigned i) const
Definition User.h:207
op_iterator op_end()
Definition User.h:261
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition Value.cpp:823
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
Definition Value.cpp:214
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
Offsets
Offsets in bytes from the start of the input buffer.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
Definition Dwarf.h:151
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
Definition Dwarf.h:144
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
Definition Dwarf.h:150
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
bool empty() const
Definition BasicBlock.h:101
Context & getContext() const
Definition BasicBlock.h:99
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2142
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1457
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
void * PointerTy
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2199
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
bool capturesFullProvenance(CaptureComponents CC)
Definition ModRef.h:396
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition Loads.cpp:468
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
Definition DebugInfo.cpp:82
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true, bool AggregateToVector=false)
Definition SROA.cpp:6180
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
Definition DebugInfo.cpp:48
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2165
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define NDEBUG
Definition regutils.h:48
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
Definition Metadata.h:822
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Describes an element of a Bitfield.
Definition Bitfields.h:176
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89