LLVM 23.0.0git
EarlyCSE.cpp
Go to the documentation of this file.
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs a simple dominator tree walk that eliminates trivially
10// redundant instructions.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Hashing.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <deque>
57#include <memory>
58#include <utility>
59
60using namespace llvm;
61using namespace llvm::PatternMatch;
62
63#define DEBUG_TYPE "early-cse"
64
65STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66STATISTIC(NumCSE, "Number of instructions CSE'd");
67STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
71STATISTIC(NumDSE, "Number of trivial dead stores removed");
72
73DEBUG_COUNTER(CSECounter, "early-cse",
74 "Controls which instructions are removed");
75
77 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
78 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
80
82 "earlycse-debug-hash", cl::init(false), cl::Hidden,
83 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
85
86//===----------------------------------------------------------------------===//
87// SimpleValue
88//===----------------------------------------------------------------------===//
89
90namespace {
91
92/// Struct representing the available values in the scoped hash table.
93struct SimpleValue {
94 Instruction *Inst;
95
96 SimpleValue(Instruction *I) : Inst(I) {
97 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
98 }
99
100 bool isSentinel() const {
101 return Inst == DenseMapInfo<Instruction *>::getEmptyKey();
102 }
103
104 static bool canHandle(Instruction *Inst) {
105 // This can only handle non-void readnone functions.
106 // Also handled are constrained intrinsic that look like the types
107 // of instruction handled below (UnaryOperator, etc.).
108 if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
109 if (Function *F = CI->getCalledFunction()) {
110 switch (F->getIntrinsicID()) {
111 case Intrinsic::experimental_constrained_fadd:
112 case Intrinsic::experimental_constrained_fsub:
113 case Intrinsic::experimental_constrained_fmul:
114 case Intrinsic::experimental_constrained_fdiv:
115 case Intrinsic::experimental_constrained_frem:
116 case Intrinsic::experimental_constrained_fptosi:
117 case Intrinsic::experimental_constrained_sitofp:
118 case Intrinsic::experimental_constrained_fptoui:
119 case Intrinsic::experimental_constrained_uitofp:
120 case Intrinsic::experimental_constrained_fcmp:
121 case Intrinsic::experimental_constrained_fcmps: {
122 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
123 if (CFP->getExceptionBehavior() &&
124 CFP->getExceptionBehavior() == fp::ebStrict)
125 return false;
126 // Since we CSE across function calls we must not allow
127 // the rounding mode to change.
128 if (CFP->getRoundingMode() &&
129 CFP->getRoundingMode() == RoundingMode::Dynamic)
130 return false;
131 return true;
132 }
133 }
134 }
135 return CI->doesNotAccessMemory() &&
136 // FIXME: Currently the calls which may access the thread id may
137 // be considered as not accessing the memory. But this is
138 // problematic for coroutines, since coroutines may resume in a
139 // different thread. So we disable the optimization here for the
140 // correctness. However, it may block many other correct
141 // optimizations. Revert this one when we detect the memory
142 // accessing kind more precisely.
143 !CI->getFunction()->isPresplitCoroutine();
144 }
145 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146 isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
150 isa<FreezeInst>(Inst);
151 }
152};
153
154} // end anonymous namespace
155
156template <> struct llvm::DenseMapInfo<SimpleValue> {
157 static inline SimpleValue getEmptyKey() {
159 }
160
161 static unsigned getHashValue(SimpleValue Val);
162 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
163};
164
165/// Match a 'select' including an optional 'not's of the condition.
167 Value *&B,
168 SelectPatternFlavor &Flavor) {
169 // Return false if V is not even a select.
170 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
171 return false;
172
173 // Look through a 'not' of the condition operand by swapping A/B.
174 Value *CondNot;
175 if (match(Cond, m_Not(m_Value(CondNot)))) {
176 Cond = CondNot;
177 std::swap(A, B);
178 }
179
180 // Match canonical forms of min/max. We are not using ValueTracking's
181 // more powerful matchSelectPattern() because it may rely on instruction flags
182 // such as "nsw". That would be incompatible with the current hashing
183 // mechanism that may remove flags to increase the likelihood of CSE.
184
185 Flavor = SPF_UNKNOWN;
186 CmpPredicate Pred;
187
188 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
189 // Check for commuted variants of min/max by swapping predicate.
190 // If we do not match the standard or commuted patterns, this is not a
191 // recognized form of min/max, but it is still a select, so return true.
192 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
193 return true;
195 }
196
197 switch (Pred) {
198 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
199 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
200 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
201 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
202 // Non-strict inequalities.
203 case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
204 case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
205 case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
206 case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
207 default: break;
208 }
209
210 return true;
211}
212
213static unsigned hashCallInst(CallInst *CI) {
214 // Don't CSE convergent calls in different basic blocks, because they
215 // implicitly depend on the set of threads that is currently executing.
216 if (CI->isConvergent()) {
217 return hash_combine(CI->getOpcode(), CI->getParent(),
219 }
220 return hash_combine(CI->getOpcode(),
222}
223
224static unsigned getHashValueImpl(SimpleValue Val) {
225 Instruction *Inst = Val.Inst;
226 // Hash in all of the operands as pointers.
227 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
228 Value *LHS = BinOp->getOperand(0);
229 Value *RHS = BinOp->getOperand(1);
230 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
231 std::swap(LHS, RHS);
232
233 return hash_combine(BinOp->getOpcode(), LHS, RHS);
234 }
235
236 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
237 // Compares can be commuted by swapping the comparands and
238 // updating the predicate. Choose the form that has the
239 // comparands in sorted order, or in the case of a tie, the
240 // one with the lower predicate.
241 Value *LHS = CI->getOperand(0);
242 Value *RHS = CI->getOperand(1);
243 CmpInst::Predicate Pred = CI->getPredicate();
244 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
245 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
246 std::swap(LHS, RHS);
247 Pred = SwappedPred;
248 }
249 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
250 }
251
252 // Hash general selects to allow matching commuted true/false operands.
254 Value *Cond, *A, *B;
255 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
256 // Hash min/max (cmp + select) to allow for commuted operands.
257 // Min/max may also have non-canonical compare predicate (eg, the compare for
258 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
259 // compare.
260 // TODO: We should also detect FP min/max.
261 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
262 SPF == SPF_UMIN || SPF == SPF_UMAX) {
263 if (A > B)
264 std::swap(A, B);
265 return hash_combine(Inst->getOpcode(), SPF, A, B);
266 }
267
268 // Hash general selects to allow matching commuted true/false operands.
269
270 // If we do not have a compare as the condition, just hash in the condition.
271 CmpPredicate Pred;
272 Value *X, *Y;
273 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
274 return hash_combine(Inst->getOpcode(), Cond, A, B);
275
276 // Similar to cmp normalization (above) - canonicalize the predicate value:
277 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
278 if (CmpInst::getInversePredicate(Pred) < Pred) {
279 Pred = CmpInst::getInversePredicate(Pred);
280 std::swap(A, B);
281 }
282 return hash_combine(Inst->getOpcode(),
283 static_cast<CmpInst::Predicate>(Pred), X, Y, A, B);
284 }
285
286 if (CastInst *CI = dyn_cast<CastInst>(Inst))
287 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
288
289 if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
290 return hash_combine(FI->getOpcode(), FI->getOperand(0));
291
292 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
293 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
294 hash_combine_range(EVI->indices()));
295
296 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
297 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
298 IVI->getOperand(1), hash_combine_range(IVI->indices()));
299
302 isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
303 "Invalid/unknown instruction");
304
305 // Handle intrinsics with commutative operands.
306 auto *II = dyn_cast<IntrinsicInst>(Inst);
307 if (II && II->isCommutative() && II->arg_size() >= 2) {
308 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
309 if (LHS > RHS)
310 std::swap(LHS, RHS);
311 return hash_combine(
312 II->getOpcode(), LHS, RHS,
313 hash_combine_range(drop_begin(II->operand_values(), 2)));
314 }
315
316 // gc.relocate is 'special' call: its second and third operands are
317 // not real values, but indices into statepoint's argument list.
318 // Get values they point to.
319 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
320 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
321 GCR->getBasePtr(), GCR->getDerivedPtr());
322
323 // Don't CSE convergent calls in different basic blocks, because they
324 // implicitly depend on the set of threads that is currently executing.
325 if (CallInst *CI = dyn_cast<CallInst>(Inst))
326 return hashCallInst(CI);
327
328 // Mix in the opcode.
329 return hash_combine(Inst->getOpcode(),
331}
332
333unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
334#ifndef NDEBUG
335 // If -earlycse-debug-hash was specified, return a constant -- this
336 // will force all hashing to collide, so we'll exhaustively search
337 // the table for a match, and the assertion in isEqual will fire if
338 // there's a bug causing equal keys to hash differently.
340 return 0;
341#endif
342 return getHashValueImpl(Val);
343}
344
345static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
346 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
347
348 if (LHS.isSentinel() || RHS.isSentinel())
349 return LHSI == RHSI;
350
351 if (LHSI->getOpcode() != RHSI->getOpcode())
352 return false;
353 if (LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true)) {
354 // Convergent calls implicitly depend on the set of threads that is
355 // currently executing, so conservatively return false if they are in
356 // different basic blocks.
357 if (CallInst *CI = dyn_cast<CallInst>(LHSI);
358 CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())
359 return false;
360
361 return true;
362 }
363
364 // If we're not strictly identical, we still might be a commutable instruction
365 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
366 if (!LHSBinOp->isCommutative())
367 return false;
368
370 "same opcode, but different instruction type?");
371 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
372
373 // Commuted equality
374 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
375 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
376 }
377 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
378 assert(isa<CmpInst>(RHSI) &&
379 "same opcode, but different instruction type?");
380 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
381 // Commuted equality
382 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
383 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
384 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
385 }
386
387 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
388 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
389 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
390 LII->isCommutative() && LII->arg_size() >= 2) {
391 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
392 LII->getArgOperand(1) == RII->getArgOperand(0) &&
393 std::equal(LII->arg_begin() + 2, LII->arg_end(),
394 RII->arg_begin() + 2, RII->arg_end()) &&
395 LII->hasSameSpecialState(RII, /*IgnoreAlignment=*/false,
396 /*IntersectAttrs=*/true);
397 }
398
399 // See comment above in `getHashValue()`.
400 if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
401 if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
402 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
403 GCR1->getBasePtr() == GCR2->getBasePtr() &&
404 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
405
406 // Min/max can occur with commuted operands, non-canonical predicates,
407 // and/or non-canonical operands.
408 // Selects can be non-trivially equivalent via inverted conditions and swaps.
409 SelectPatternFlavor LSPF, RSPF;
410 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
411 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
412 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
413 if (LSPF == RSPF) {
414 // TODO: We should also detect FP min/max.
415 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
416 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
417 return ((LHSA == RHSA && LHSB == RHSB) ||
418 (LHSA == RHSB && LHSB == RHSA));
419
420 // select Cond, A, B <--> select not(Cond), B, A
421 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
422 return true;
423 }
424
425 // If the true/false operands are swapped and the conditions are compares
426 // with inverted predicates, the selects are equal:
427 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
428 //
429 // This also handles patterns with a double-negation in the sense of not +
430 // inverse, because we looked through a 'not' in the matching function and
431 // swapped A/B:
432 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
433 //
434 // This intentionally does NOT handle patterns with a double-negation in
435 // the sense of not + not, because doing so could result in values
436 // comparing
437 // as equal that hash differently in the min/max cases like:
438 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
439 // ^ hashes as min ^ would not hash as min
440 // In the context of the EarlyCSE pass, however, such cases never reach
441 // this code, as we simplify the double-negation before hashing the second
442 // select (and so still succeed at CSEing them).
443 if (LHSA == RHSB && LHSB == RHSA) {
444 CmpPredicate PredL, PredR;
445 Value *X, *Y;
446 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
447 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
448 CmpInst::getInversePredicate(PredL) == PredR)
449 return true;
450 }
451 }
452
453 return false;
454}
455
456bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
457 // These comparisons are nontrivial, so assert that equality implies
458 // hash equality (DenseMap demands this as an invariant).
459 bool Result = isEqualImpl(LHS, RHS);
460 assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
462 return Result;
463}
464
465//===----------------------------------------------------------------------===//
466// CallValue
467//===----------------------------------------------------------------------===//
468
469namespace {
470
471/// Struct representing the available call values in the scoped hash
472/// table.
473struct CallValue {
474 Instruction *Inst;
475
476 CallValue(Instruction *I) : Inst(I) {
477 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
478 }
479
480 bool isSentinel() const {
481 return Inst == DenseMapInfo<Instruction *>::getEmptyKey();
482 }
483
484 static bool canHandle(Instruction *Inst) {
485 CallInst *CI = dyn_cast<CallInst>(Inst);
486 if (!CI || (!CI->onlyReadsMemory() && !CI->onlyWritesMemory()) ||
487 // FIXME: Currently the calls which may access the thread id may
488 // be considered as not accessing the memory. But this is
489 // problematic for coroutines, since coroutines may resume in a
490 // different thread. So we disable the optimization here for the
491 // correctness. However, it may block many other correct
492 // optimizations. Revert this one when we detect the memory
493 // accessing kind more precisely.
495 return false;
496 return true;
497 }
498};
499
500} // end anonymous namespace
501
502template <> struct llvm::DenseMapInfo<CallValue> {
503 static inline CallValue getEmptyKey() {
505 }
506
507 static unsigned getHashValue(CallValue Val);
508 static bool isEqual(CallValue LHS, CallValue RHS);
509};
510
511unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
512 Instruction *Inst = Val.Inst;
513
514 // Hash all of the operands as pointers and mix in the opcode.
515 return hashCallInst(cast<CallInst>(Inst));
516}
517
518bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
519 if (LHS.isSentinel() || RHS.isSentinel())
520 return LHS.Inst == RHS.Inst;
521
522 CallInst *LHSI = cast<CallInst>(LHS.Inst);
523 CallInst *RHSI = cast<CallInst>(RHS.Inst);
524
525 // Convergent calls implicitly depend on the set of threads that is
526 // currently executing, so conservatively return false if they are in
527 // different basic blocks.
528 if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
529 return false;
530
531 return LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true);
532}
533
534//===----------------------------------------------------------------------===//
535// GEPValue
536//===----------------------------------------------------------------------===//
537
538namespace {
539
540struct GEPValue {
541 Instruction *Inst;
542 std::optional<int64_t> ConstantOffset;
543
544 GEPValue(Instruction *I) : Inst(I) {
545 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
546 }
547
548 GEPValue(Instruction *I, std::optional<int64_t> ConstantOffset)
549 : Inst(I), ConstantOffset(ConstantOffset) {
550 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
551 }
552
553 bool isSentinel() const {
554 return Inst == DenseMapInfo<Instruction *>::getEmptyKey();
555 }
556
557 static bool canHandle(Instruction *Inst) {
558 return isa<GetElementPtrInst>(Inst);
559 }
560};
561
562} // namespace
563
564template <> struct llvm::DenseMapInfo<GEPValue> {
565 static inline GEPValue getEmptyKey() {
567 }
568
569 static unsigned getHashValue(const GEPValue &Val);
570 static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
571};
572
573unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
574 auto *GEP = cast<GetElementPtrInst>(Val.Inst);
575 if (Val.ConstantOffset.has_value())
576 return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
577 Val.ConstantOffset.value());
578 return hash_combine(GEP->getOpcode(),
579 hash_combine_range(GEP->operand_values()));
580}
581
582bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
583 if (LHS.isSentinel() || RHS.isSentinel())
584 return LHS.Inst == RHS.Inst;
585 auto *LGEP = cast<GetElementPtrInst>(LHS.Inst);
586 auto *RGEP = cast<GetElementPtrInst>(RHS.Inst);
587 if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
588 return false;
589 if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())
590 return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();
591 return LGEP->isIdenticalToWhenDefined(RGEP);
592}
593
594//===----------------------------------------------------------------------===//
595// EarlyCSE implementation
596//===----------------------------------------------------------------------===//
597
598namespace {
599
600/// A simple and fast domtree-based CSE pass.
601///
602/// This pass does a simple depth-first walk over the dominator tree,
603/// eliminating trivially redundant instructions and using instsimplify to
604/// canonicalize things as it goes. It is intended to be fast and catch obvious
605/// cases so that instcombine and other passes are more effective. It is
606/// expected that a later pass of GVN will catch the interesting/hard cases.
607class EarlyCSE {
608public:
609 const TargetLibraryInfo &TLI;
610 const TargetTransformInfo &TTI;
611 DominatorTree &DT;
612 AssumptionCache &AC;
613 const SimplifyQuery SQ;
614 MemorySSA *MSSA;
615 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
616
617 using AllocatorTy =
618 RecyclingAllocator<BumpPtrAllocator,
619 ScopedHashTableVal<SimpleValue, Value *>>;
620 using ScopedHTType =
621 ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
622 AllocatorTy>;
623
624 /// A scoped hash table of the current values of all of our simple
625 /// scalar expressions.
626 ///
627 /// As we walk down the domtree, we look to see if instructions are in this:
628 /// if so, we replace them with what we find, otherwise we insert them so
629 /// that dominated values can succeed in their lookup.
630 ScopedHTType AvailableValues;
631
632 /// A scoped hash table of the current values of previously encountered
633 /// memory locations.
634 ///
635 /// This allows us to get efficient access to dominating loads or stores when
636 /// we have a fully redundant load. In addition to the most recent load, we
637 /// keep track of a generation count of the read, which is compared against
638 /// the current generation count. The current generation count is incremented
639 /// after every possibly writing memory operation, which ensures that we only
640 /// CSE loads with other loads that have no intervening store. Ordering
641 /// events (such as fences or atomic instructions) increment the generation
642 /// count as well; essentially, we model these as writes to all possible
643 /// locations. Note that atomic and/or volatile loads and stores can be
644 /// present the table; it is the responsibility of the consumer to inspect
645 /// the atomicity/volatility if needed.
646 struct LoadValue {
647 Instruction *DefInst = nullptr;
648 unsigned Generation = 0;
649 int MatchingId = -1;
650 bool IsAtomic = false;
651 bool IsLoad = false;
652
653 LoadValue() = default;
654 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
655 bool IsAtomic, bool IsLoad)
656 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
657 IsAtomic(IsAtomic), IsLoad(IsLoad) {}
658 };
659
660 using LoadMapAllocator =
661 RecyclingAllocator<BumpPtrAllocator,
662 ScopedHashTableVal<Value *, LoadValue>>;
663 using LoadHTType =
664 ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
665 LoadMapAllocator>;
666
667 LoadHTType AvailableLoads;
668
669 // A scoped hash table mapping memory locations (represented as typed
670 // addresses) to generation numbers at which that memory location became
671 // (henceforth indefinitely) invariant.
672 using InvariantMapAllocator =
673 RecyclingAllocator<BumpPtrAllocator,
674 ScopedHashTableVal<MemoryLocation, unsigned>>;
675 using InvariantHTType =
676 ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
677 InvariantMapAllocator>;
678 InvariantHTType AvailableInvariants;
679
680 /// A scoped hash table of the current values of read-only call
681 /// values.
682 ///
683 /// It uses the same generation count as loads.
684 using CallHTType =
685 ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
686 CallHTType AvailableCalls;
687
688 using GEPMapAllocatorTy =
689 RecyclingAllocator<BumpPtrAllocator,
690 ScopedHashTableVal<GEPValue, Value *>>;
691 using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
692 GEPMapAllocatorTy>;
693 GEPHTType AvailableGEPs;
694
695 /// This is the current generation of the memory value.
696 unsigned CurrentGeneration = 0;
697
698 /// Set up the EarlyCSE runner for a particular function.
699 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
700 const TargetTransformInfo &TTI, DominatorTree &DT,
701 AssumptionCache &AC, MemorySSA *MSSA)
702 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
703 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
704
705 bool run();
706
707private:
708 unsigned ClobberCounter = 0;
709 // Almost a POD, but needs to call the constructors for the scoped hash
710 // tables so that a new scope gets pushed on. These are RAII so that the
711 // scope gets popped when the NodeScope is destroyed.
712 class NodeScope {
713 public:
714 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
715 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
716 GEPHTType &AvailableGEPs)
717 : Scope(AvailableValues), LoadScope(AvailableLoads),
718 InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
719 GEPScope(AvailableGEPs) {}
720 NodeScope(const NodeScope &) = delete;
721 NodeScope &operator=(const NodeScope &) = delete;
722
723 private:
725 LoadHTType::ScopeTy LoadScope;
726 InvariantHTType::ScopeTy InvariantScope;
727 CallHTType::ScopeTy CallScope;
728 GEPHTType::ScopeTy GEPScope;
729 };
730
731 // Contains all the needed information to create a stack for doing a depth
732 // first traversal of the tree. This includes scopes for values, loads, and
733 // calls as well as the generation. There is a child iterator so that the
734 // children do not need to be store separately.
735 class StackNode {
736 public:
737 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
738 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
739 GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
740 DomTreeNode::const_iterator child,
741 DomTreeNode::const_iterator end)
742 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
743 EndIter(end),
744 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
745 AvailableCalls, AvailableGEPs) {}
746 StackNode(const StackNode &) = delete;
747 StackNode &operator=(const StackNode &) = delete;
748
749 // Accessors.
750 unsigned currentGeneration() const { return CurrentGeneration; }
751 unsigned childGeneration() const { return ChildGeneration; }
752 void childGeneration(unsigned generation) { ChildGeneration = generation; }
753 DomTreeNode *node() { return Node; }
754 DomTreeNode::const_iterator childIter() const { return ChildIter; }
755
756 DomTreeNode *nextChild() {
757 DomTreeNode *child = *ChildIter;
758 ++ChildIter;
759 return child;
760 }
761
762 DomTreeNode::const_iterator end() const { return EndIter; }
763 bool isProcessed() const { return Processed; }
764 void process() { Processed = true; }
765
766 private:
767 unsigned CurrentGeneration;
768 unsigned ChildGeneration;
769 DomTreeNode *Node;
770 DomTreeNode::const_iterator ChildIter;
771 DomTreeNode::const_iterator EndIter;
772 NodeScope Scopes;
773 bool Processed = false;
774 };
775
776 /// Wrapper class to handle memory instructions, including loads,
777 /// stores and intrinsic loads and stores defined by the target.
778 class ParseMemoryInst {
779 public:
780 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
781 : Inst(Inst) {
782 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
783 IntrID = II->getIntrinsicID();
784 if (TTI.getTgtMemIntrinsic(II, Info))
785 return;
786 if (isHandledNonTargetIntrinsic(IntrID)) {
787 switch (IntrID) {
788 case Intrinsic::masked_load:
789 Info.PtrVal = Inst->getOperand(0);
790 Info.MatchingId = Intrinsic::masked_load;
791 Info.ReadMem = true;
792 Info.WriteMem = false;
793 Info.IsVolatile = false;
794 break;
795 case Intrinsic::masked_store:
796 Info.PtrVal = Inst->getOperand(1);
797 // Use the ID of masked load as the "matching id". This will
798 // prevent matching non-masked loads/stores with masked ones
799 // (which could be done), but at the moment, the code here
800 // does not support matching intrinsics with non-intrinsics,
801 // so keep the MatchingIds specific to masked instructions
802 // for now (TODO).
803 Info.MatchingId = Intrinsic::masked_load;
804 Info.ReadMem = false;
805 Info.WriteMem = true;
806 Info.IsVolatile = false;
807 break;
808 }
809 } else if (auto *MI = dyn_cast<MemSetInst>(Inst)) {
810 Info.PtrVal = MI->getDest();
811 Info.MatchingId = 0;
812 Info.ReadMem = false;
813 Info.WriteMem = true;
814 Info.IsVolatile = MI->isVolatile();
815 }
816 }
817 }
818
819 Instruction *get() { return Inst; }
820 const Instruction *get() const { return Inst; }
821
822 bool isLoad() const {
823 if (IntrID != 0)
824 return Info.ReadMem;
825 return isa<LoadInst>(Inst);
826 }
827
828 bool isStore() const {
829 if (IntrID != 0)
830 return Info.WriteMem;
831 return isa<StoreInst>(Inst);
832 }
833
834 bool isAtomic() const {
835 if (IntrID != 0)
836 return Info.Ordering != AtomicOrdering::NotAtomic;
837 return Inst->isAtomic();
838 }
839
840 bool isUnordered() const {
841 if (IntrID != 0)
842 return Info.isUnordered();
843
844 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
845 return LI->isUnordered();
846 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
847 return SI->isUnordered();
848 }
849 // Conservative answer
850 return !Inst->isAtomic();
851 }
852
853 bool isVolatile() const {
854 if (IntrID != 0)
855 return Info.IsVolatile;
856
857 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
858 return LI->isVolatile();
859 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
860 return SI->isVolatile();
861 }
862 // Conservative answer
863 return true;
864 }
865
866 bool isInvariantLoad() const {
867 if (auto *LI = dyn_cast<LoadInst>(Inst))
868 return LI->hasMetadata(LLVMContext::MD_invariant_load);
869 return false;
870 }
871
872 bool isValid() const { return getPointerOperand() != nullptr; }
873
874 // For regular (non-intrinsic) loads/stores, this is set to -1. For
875 // intrinsic loads/stores, the id is retrieved from the corresponding
876 // field in the MemIntrinsicInfo structure. That field contains
877 // non-negative values only.
878 int getMatchingId() const {
879 if (IntrID != 0)
880 return Info.MatchingId;
881 return -1;
882 }
883
884 Value *getPointerOperand() const {
885 if (IntrID != 0)
886 return Info.PtrVal;
887 return getLoadStorePointerOperand(Inst);
888 }
889
890 Type *getValueType() const {
891 // TODO: handle target-specific intrinsics.
892 return Inst->getAccessType();
893 }
894
895 bool mayReadFromMemory() const {
896 if (IntrID != 0)
897 return Info.ReadMem;
898 return Inst->mayReadFromMemory();
899 }
900
901 bool mayWriteToMemory() const {
902 if (IntrID != 0)
903 return Info.WriteMem;
904 return Inst->mayWriteToMemory();
905 }
906
907 private:
908 Intrinsic::ID IntrID = 0;
909 MemIntrinsicInfo Info;
910 Instruction *Inst;
911 };
912
913 // This function is to prevent accidentally passing a non-target
914 // intrinsic ID to TargetTransformInfo.
915 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
916 switch (ID) {
917 case Intrinsic::masked_load:
918 case Intrinsic::masked_store:
919 return true;
920 }
921 return false;
922 }
923 static bool isHandledNonTargetIntrinsic(const Value *V) {
924 if (auto *II = dyn_cast<IntrinsicInst>(V))
925 return isHandledNonTargetIntrinsic(II->getIntrinsicID());
926 return false;
927 }
928
929 bool processNode(DomTreeNode *Node);
930
931 bool handleBranchCondition(Instruction *CondInst, const CondBrInst *BI,
932 const BasicBlock *BB, const BasicBlock *Pred);
933
934 Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
935 unsigned CurrentGeneration);
936
937 bool overridingStores(const ParseMemoryInst &Earlier,
938 const ParseMemoryInst &Later);
939
940 Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType,
941 bool CanCreate) const {
942 // TODO: We could insert relevant casts on type mismatch.
943 // The load or the store's first operand.
944 Value *V;
945 if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
946 switch (II->getIntrinsicID()) {
947 case Intrinsic::masked_load:
948 V = II;
949 break;
950 case Intrinsic::masked_store:
951 V = II->getOperand(0);
952 break;
953 default:
954 return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType,
955 CanCreate);
956 }
957 } else {
958 V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
959 }
960
961 return V->getType() == ExpectedType ? V : nullptr;
962 }
963
964 /// Return true if the instruction is known to only operate on memory
965 /// provably invariant in the given "generation".
966 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
967
968 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
969 Instruction *EarlierInst, Instruction *LaterInst);
970
971 bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
972 const IntrinsicInst *Later) {
973 auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
974 // Is Mask0 a submask of Mask1?
975 if (Mask0 == Mask1)
976 return true;
977 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
978 return false;
979 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
980 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
981 if (!Vec0 || !Vec1)
982 return false;
983 if (Vec0->getType() != Vec1->getType())
984 return false;
985 for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
986 Constant *Elem0 = Vec0->getOperand(i);
987 Constant *Elem1 = Vec1->getOperand(i);
988 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
989 if (Int0 && Int0->isZero())
990 continue;
991 auto *Int1 = dyn_cast<ConstantInt>(Elem1);
992 if (Int1 && !Int1->isZero())
993 continue;
994 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
995 return false;
996 if (Elem0 == Elem1)
997 continue;
998 return false;
999 }
1000 return true;
1001 };
1002 auto PtrOp = [](const IntrinsicInst *II) {
1003 if (II->getIntrinsicID() == Intrinsic::masked_load)
1004 return II->getOperand(0);
1005 if (II->getIntrinsicID() == Intrinsic::masked_store)
1006 return II->getOperand(1);
1007 llvm_unreachable("Unexpected IntrinsicInst");
1008 };
1009 auto MaskOp = [](const IntrinsicInst *II) {
1010 if (II->getIntrinsicID() == Intrinsic::masked_load)
1011 return II->getOperand(1);
1012 if (II->getIntrinsicID() == Intrinsic::masked_store)
1013 return II->getOperand(2);
1014 llvm_unreachable("Unexpected IntrinsicInst");
1015 };
1016 auto ThruOp = [](const IntrinsicInst *II) {
1017 if (II->getIntrinsicID() == Intrinsic::masked_load)
1018 return II->getOperand(2);
1019 llvm_unreachable("Unexpected IntrinsicInst");
1020 };
1021
1022 if (PtrOp(Earlier) != PtrOp(Later))
1023 return false;
1024
1025 Intrinsic::ID IDE = Earlier->getIntrinsicID();
1026 Intrinsic::ID IDL = Later->getIntrinsicID();
1027 // We could really use specific intrinsic classes for masked loads
1028 // and stores in IntrinsicInst.h.
1029 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
1030 // Trying to replace later masked load with the earlier one.
1031 // Check that the pointers are the same, and
1032 // - masks and pass-throughs are the same, or
1033 // - replacee's pass-through is "undef" and replacer's mask is a
1034 // super-set of the replacee's mask.
1035 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
1036 return true;
1037 if (!isa<UndefValue>(ThruOp(Later)))
1038 return false;
1039 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1040 }
1041 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
1042 // Trying to replace a load of a stored value with the store's value.
1043 // Check that the pointers are the same, and
1044 // - load's mask is a subset of store's mask, and
1045 // - load's pass-through is "undef".
1046 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
1047 return false;
1048 return isa<UndefValue>(ThruOp(Later));
1049 }
1050 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
1051 // Trying to remove a store of the loaded value.
1052 // Check that the pointers are the same, and
1053 // - store's mask is a subset of the load's mask.
1054 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1055 }
1056 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1057 // Trying to remove a dead store (earlier).
1058 // Check that the pointers are the same,
1059 // - the to-be-removed store's mask is a subset of the other store's
1060 // mask.
1061 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1062 }
1063 return false;
1064 }
1065
1066 void removeMSSA(Instruction &Inst) {
1067 if (!MSSA)
1068 return;
1069 if (VerifyMemorySSA)
1070 MSSA->verifyMemorySSA();
1071 // Removing a store here can leave MemorySSA in an unoptimized state by
1072 // creating MemoryPhis that have identical arguments and by creating
1073 // MemoryUses whose defining access is not an actual clobber. The phi case
1074 // is handled by MemorySSA when passing OptimizePhis = true to
1075 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1076 // by MemorySSA's getClobberingMemoryAccess.
1077 MSSAUpdater->removeMemoryAccess(&Inst, true);
1078 }
1079};
1080
1081} // end anonymous namespace
1082
1083/// Determine if the memory referenced by LaterInst is from the same heap
1084/// version as EarlierInst.
1085/// This is currently called in two scenarios:
1086///
1087/// load p
1088/// ...
1089/// load p
1090///
1091/// and
1092///
1093/// x = load p
1094/// ...
1095/// store x, p
1096///
1097/// in both cases we want to verify that there are no possible writes to the
1098/// memory referenced by p between the earlier and later instruction.
1099bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1100 unsigned LaterGeneration,
1101 Instruction *EarlierInst,
1102 Instruction *LaterInst) {
1103 // Check the simple memory generation tracking first.
1104 if (EarlierGeneration == LaterGeneration)
1105 return true;
1106
1107 if (!MSSA)
1108 return false;
1109
1110 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1111 // read/write memory, then we can safely return true here.
1112 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1113 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1114 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1115 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1116 // with the default optimization pipeline.
1117 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1118 if (!EarlierMA)
1119 return true;
1120 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1121 if (!LaterMA)
1122 return true;
1123
1124 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1125 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1126 // EarlierInst and LaterInst and neither can any other write that potentially
1127 // clobbers LaterInst.
1128 MemoryAccess *LaterDef;
1129 if (ClobberCounter < EarlyCSEMssaOptCap) {
1130 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1131 ClobberCounter++;
1132 } else
1133 LaterDef = LaterMA->getDefiningAccess();
1134
1135 return MSSA->dominates(LaterDef, EarlierMA);
1136}
1137
1138bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1139 // A location loaded from with an invariant_load is assumed to *never* change
1140 // within the visible scope of the compilation.
1141 if (auto *LI = dyn_cast<LoadInst>(I))
1142 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1143 return true;
1144
1145 auto MemLocOpt = MemoryLocation::getOrNone(I);
1146 if (!MemLocOpt)
1147 // "target" intrinsic forms of loads aren't currently known to
1148 // MemoryLocation::get. TODO
1149 return false;
1150 MemoryLocation MemLoc = *MemLocOpt;
1151 if (!AvailableInvariants.count(MemLoc))
1152 return false;
1153
1154 // Is the generation at which this became invariant older than the
1155 // current one?
1156 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1157}
1158
1159bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1160 const CondBrInst *BI, const BasicBlock *BB,
1161 const BasicBlock *Pred) {
1162 assert(BI->getCondition() == CondInst && "Wrong condition?");
1163 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1164 auto *TorF = (BI->getSuccessor(0) == BB)
1166 : ConstantInt::getFalse(BB->getContext());
1167 auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1168 Value *&RHS) {
1169 if (Opcode == Instruction::And &&
1171 return true;
1172 else if (Opcode == Instruction::Or &&
1174 return true;
1175 return false;
1176 };
1177 // If the condition is AND operation, we can propagate its operands into the
1178 // true branch. If it is OR operation, we can propagate them into the false
1179 // branch.
1180 unsigned PropagateOpcode =
1181 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1182
1183 bool MadeChanges = false;
1184 SmallVector<Instruction *, 4> WorkList;
1185 SmallPtrSet<Instruction *, 4> Visited;
1186 WorkList.push_back(CondInst);
1187 while (!WorkList.empty()) {
1188 Instruction *Curr = WorkList.pop_back_val();
1189
1190 AvailableValues.insert(Curr, TorF);
1191 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1192 << Curr->getName() << "' as " << *TorF << " in "
1193 << BB->getName() << "\n");
1194 if (!DebugCounter::shouldExecute(CSECounter)) {
1195 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1196 } else {
1197 // Replace all dominated uses with the known value.
1198 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1199 BasicBlockEdge(Pred, BB))) {
1200 NumCSECVP += Count;
1201 MadeChanges = true;
1202 }
1203 }
1204
1205 Value *LHS, *RHS;
1206 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1207 for (auto *Op : { LHS, RHS })
1208 if (Instruction *OPI = dyn_cast<Instruction>(Op))
1209 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1210 WorkList.push_back(OPI);
1211 }
1212
1213 return MadeChanges;
1214}
1215
1216Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1217 unsigned CurrentGeneration) {
1218 if (InVal.DefInst == nullptr)
1219 return nullptr;
1220 if (auto *MSI = dyn_cast<MemSetInst>(InVal.DefInst)) {
1221 if (!MemInst.isLoad() || MemInst.isVolatile() || !MemInst.isUnordered())
1222 return nullptr;
1223 if (MSI->isVolatile())
1224 return nullptr;
1225 auto *Val = dyn_cast<ConstantInt>(MSI->getValue());
1226 if (!Val || !Val->isZero())
1227 return nullptr;
1228 auto Len = MSI->getLengthInBytes();
1229 if (!Len)
1230 return nullptr;
1231 Type *InstType = MemInst.getValueType();
1232 if (!InstType)
1233 return nullptr;
1234 TypeSize LoadSize = SQ.DL.getTypeStoreSize(InstType);
1235 if (LoadSize.isScalable() || Len->ult(LoadSize.getFixedValue()))
1236 return nullptr;
1237 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1238 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1239 MemInst.get()))
1240 return nullptr;
1241 return Constant::getNullValue(MemInst.getValueType());
1242 }
1243 if (InVal.MatchingId != MemInst.getMatchingId())
1244 return nullptr;
1245 // We don't yet handle removing loads with ordering of any kind.
1246 if (MemInst.isVolatile() || !MemInst.isUnordered())
1247 return nullptr;
1248 // We can't replace an atomic load with one which isn't also atomic.
1249 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1250 return nullptr;
1251 // The value V returned from this function is used differently depending
1252 // on whether MemInst is a load or a store. If it's a load, we will replace
1253 // MemInst with V, if it's a store, we will check if V is the same as the
1254 // available value.
1255 bool MemInstMatching = !MemInst.isLoad();
1256 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1257 Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1258
1259 // For stores check the result values before checking memory generation
1260 // (otherwise isSameMemGeneration may crash).
1261 Value *Result =
1262 MemInst.isStore()
1263 ? getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/false)
1264 : nullptr;
1265 if (MemInst.isStore() && InVal.DefInst != Result)
1266 return nullptr;
1267
1268 // Deal with non-target memory intrinsics.
1269 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1270 bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1271 if (OtherNTI != MatchingNTI)
1272 return nullptr;
1273 if (OtherNTI && MatchingNTI) {
1274 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1275 cast<IntrinsicInst>(MemInst.get())))
1276 return nullptr;
1277 }
1278
1279 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1280 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1281 MemInst.get()))
1282 return nullptr;
1283
1284 if (!Result)
1285 Result = getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/true);
1286 return Result;
1287}
1288
1289static void combineIRFlags(Instruction &From, Value *To) {
1290 if (auto *I = dyn_cast<Instruction>(To)) {
1291 // If I being poison triggers UB, there is no need to drop those
1292 // flags. Otherwise, only retain flags present on both I and Inst.
1293 // TODO: Currently some fast-math flags are not treated as
1294 // poison-generating even though they should. Until this is fixed,
1295 // always retain flags present on both I and Inst for floating point
1296 // instructions.
1297 if (isa<FPMathOperator>(I) ||
1298 (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1299 I->andIRFlags(&From);
1300 }
1301 if (isa<CallBase>(&From) && isa<CallBase>(To)) {
1302 // NB: Intersection of attrs between InVal.first and Inst is overly
1303 // conservative. Since we only CSE readonly functions that have the same
1304 // memory state, we can preserve (or possibly in some cases combine)
1305 // more attributes. Likewise this implies when checking equality of
1306 // callsite for CSEing, we can probably ignore more attributes.
1307 // Generally poison generating attributes need to be handled with more
1308 // care as they can create *new* UB if preserved/combined and violated.
1309 // Attributes that imply immediate UB on the other hand would have been
1310 // violated either way.
1311 bool Success =
1312 cast<CallBase>(To)->tryIntersectAttributes(cast<CallBase>(&From));
1313 assert(Success && "Failed to intersect attributes in callsites that "
1314 "passed identical check");
1315 // For NDEBUG Compile.
1316 (void)Success;
1317 }
1318}
1319
1320bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1321 const ParseMemoryInst &Later) {
1322 // Can we remove Earlier store because of Later store?
1323
1324 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1325 "Violated invariant");
1326 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1327 return false;
1328 if (!Earlier.getValueType() || !Later.getValueType() ||
1329 Earlier.getValueType() != Later.getValueType())
1330 return false;
1331 if (Earlier.getMatchingId() != Later.getMatchingId())
1332 return false;
1333 // At the moment, we don't remove ordered stores, but do remove
1334 // unordered atomic stores. There's no special requirement (for
1335 // unordered atomics) about removing atomic stores only in favor of
1336 // other atomic stores since we were going to execute the non-atomic
1337 // one anyway and the atomic one might never have become visible.
1338 if (!Earlier.isUnordered() || !Later.isUnordered())
1339 return false;
1340
1341 // Deal with non-target memory intrinsics.
1342 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1343 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1344 if (ENTI && LNTI)
1345 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1346 cast<IntrinsicInst>(Later.get()));
1347
1348 // Because of the check above, at least one of them is false.
1349 // For now disallow matching intrinsics with non-intrinsics,
1350 // so assume that the stores match if neither is an intrinsic.
1351 return ENTI == LNTI;
1352}
1353
1354bool EarlyCSE::processNode(DomTreeNode *Node) {
1355 bool Changed = false;
1356 BasicBlock *BB = Node->getBlock();
1357
1358 // If this block has a single predecessor, then the predecessor is the parent
1359 // of the domtree node and all of the live out memory values are still current
1360 // in this block. If this block has multiple predecessors, then they could
1361 // have invalidated the live-out memory values of our parent value. For now,
1362 // just be conservative and invalidate memory if this block has multiple
1363 // predecessors.
1364 if (!BB->getSinglePredecessor())
1365 ++CurrentGeneration;
1366
1367 // If this node has a single predecessor which ends in a conditional branch,
1368 // we can infer the value of the branch condition given that we took this
1369 // path. We need the single predecessor to ensure there's not another path
1370 // which reaches this block where the condition might hold a different
1371 // value. Since we're adding this to the scoped hash table (like any other
1372 // def), it will have been popped if we encounter a future merge block.
1373 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1374 if (auto *BI = dyn_cast<CondBrInst>(Pred->getTerminator())) {
1375 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1376 if (CondInst && SimpleValue::canHandle(CondInst))
1377 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1378 }
1379 }
1380
1381 /// LastStore - Keep track of the last non-volatile store that we saw... for
1382 /// as long as there in no instruction that reads memory. If we see a store
1383 /// to the same location, we delete the dead store. This zaps trivial dead
1384 /// stores which can occur in bitfield code among other things.
1385 Instruction *LastStore = nullptr;
1386
1387 // See if any instructions in the block can be eliminated. If so, do it. If
1388 // not, add them to AvailableValues.
1389 for (Instruction &Inst : make_early_inc_range(*BB)) {
1390 // Dead instructions should just be removed.
1391 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1392 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1393 if (!DebugCounter::shouldExecute(CSECounter)) {
1394 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1395 continue;
1396 }
1397
1398 salvageKnowledge(&Inst, &AC);
1399 salvageDebugInfo(Inst);
1400 removeMSSA(Inst);
1401 Inst.eraseFromParent();
1402 Changed = true;
1403 ++NumSimplify;
1404 continue;
1405 }
1406
1407 // Skip assume intrinsics, they don't really have side effects (although
1408 // they're marked as such to ensure preservation of control dependencies),
1409 // and this pass will not bother with its removal. However, we should mark
1410 // its condition as true for all dominated blocks.
1411 if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1412 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1413 if (CondI && SimpleValue::canHandle(CondI)) {
1414 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1415 << '\n');
1416 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1417 } else
1418 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1419 continue;
1420 }
1421
1422 // Likewise, noalias intrinsics don't actually write.
1423 if (match(&Inst,
1425 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1426 << '\n');
1427 continue;
1428 }
1429
1430 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1432 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1433 continue;
1434 }
1435
1436 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1438 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1439 continue;
1440 }
1441
1442 // We can skip all invariant.start intrinsics since they only read memory,
1443 // and we can forward values across it. For invariant starts without
1444 // invariant ends, we can use the fact that the invariantness never ends to
1445 // start a scope in the current generaton which is true for all future
1446 // generations. Also, we dont need to consume the last store since the
1447 // semantics of invariant.start allow us to perform DSE of the last
1448 // store, if there was a store following invariant.start. Consider:
1449 //
1450 // store 30, i8* p
1451 // invariant.start(p)
1452 // store 40, i8* p
1453 // We can DSE the store to 30, since the store 40 to invariant location p
1454 // causes undefined behaviour.
1456 // If there are any uses, the scope might end.
1457 if (!Inst.use_empty())
1458 continue;
1459 MemoryLocation MemLoc =
1461 // Don't start a scope if we already have a better one pushed
1462 if (!AvailableInvariants.count(MemLoc))
1463 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1464 continue;
1465 }
1466
1467 if (isGuard(&Inst)) {
1468 if (auto *CondI =
1469 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1470 if (SimpleValue::canHandle(CondI)) {
1471 // Do we already know the actual value of this condition?
1472 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1473 // Is the condition known to be true?
1474 if (isa<ConstantInt>(KnownCond) &&
1475 cast<ConstantInt>(KnownCond)->isOne()) {
1477 << "EarlyCSE removing guard: " << Inst << '\n');
1478 salvageKnowledge(&Inst, &AC);
1479 removeMSSA(Inst);
1480 Inst.eraseFromParent();
1481 Changed = true;
1482 continue;
1483 } else
1484 // Use the known value if it wasn't true.
1485 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1486 }
1487 // The condition we're on guarding here is true for all dominated
1488 // locations.
1489 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1490 }
1491 }
1492
1493 // Guard intrinsics read all memory, but don't write any memory.
1494 // Accordingly, don't update the generation but consume the last store (to
1495 // avoid an incorrect DSE).
1496 LastStore = nullptr;
1497 continue;
1498 }
1499
1500 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1501 // its simpler value.
1502 if (Value *V = simplifyInstruction(&Inst, SQ)) {
1503 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1504 << '\n');
1505 if (!DebugCounter::shouldExecute(CSECounter)) {
1506 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1507 } else {
1508 bool Killed = false;
1509 if (!Inst.use_empty()) {
1510 Inst.replaceAllUsesWith(V);
1511 Changed = true;
1512 }
1513 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1514 salvageKnowledge(&Inst, &AC);
1515 removeMSSA(Inst);
1516 Inst.eraseFromParent();
1517 Changed = true;
1518 Killed = true;
1519 }
1520 if (Changed)
1521 ++NumSimplify;
1522 if (Killed)
1523 continue;
1524 }
1525 }
1526
1527 // Make sure stores prior to a potential unwind are not removed, as the
1528 // caller may read the memory.
1529 if (Inst.mayThrow())
1530 LastStore = nullptr;
1531
1532 // If this is a simple instruction that we can value number, process it.
1533 if (SimpleValue::canHandle(&Inst)) {
1534 if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1535 assert(CI->getExceptionBehavior() != fp::ebStrict &&
1536 "Unexpected ebStrict from SimpleValue::canHandle()");
1537 assert((!CI->getRoundingMode() ||
1538 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1539 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1540 }
1541 // See if the instruction has an available value. If so, use it.
1542 if (Value *V = AvailableValues.lookup(&Inst)) {
1543 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1544 << '\n');
1545 if (!DebugCounter::shouldExecute(CSECounter)) {
1546 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1547 continue;
1548 }
1549 combineIRFlags(Inst, V);
1550 Inst.replaceAllUsesWith(V);
1551 salvageKnowledge(&Inst, &AC);
1552 removeMSSA(Inst);
1553 Inst.eraseFromParent();
1554 Changed = true;
1555 ++NumCSE;
1556 continue;
1557 }
1558
1559 // Otherwise, just remember that this value is available.
1560 AvailableValues.insert(&Inst, &Inst);
1561 continue;
1562 }
1563
1564 ParseMemoryInst MemInst(&Inst, TTI);
1565 // If this is a non-volatile load, process it.
1566 if (MemInst.isValid() && MemInst.isLoad()) {
1567 // (conservatively) we can't peak past the ordering implied by this
1568 // operation, but we can add this load to our set of available values
1569 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1570 LastStore = nullptr;
1571 ++CurrentGeneration;
1572 }
1573
1574 if (MemInst.isInvariantLoad()) {
1575 // If we pass an invariant load, we know that memory location is
1576 // indefinitely constant from the moment of first dereferenceability.
1577 // We conservatively treat the invariant_load as that moment. If we
1578 // pass a invariant load after already establishing a scope, don't
1579 // restart it since we want to preserve the earliest point seen.
1580 auto MemLoc = MemoryLocation::get(&Inst);
1581 if (!AvailableInvariants.count(MemLoc))
1582 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1583 }
1584
1585 // If we have an available version of this load, and if it is the right
1586 // generation or the load is known to be from an invariant location,
1587 // replace this instruction.
1588 //
1589 // If either the dominating load or the current load are invariant, then
1590 // we can assume the current load loads the same value as the dominating
1591 // load.
1592 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1593 if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1594 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1595 << " to: " << *InVal.DefInst << '\n');
1596 if (!DebugCounter::shouldExecute(CSECounter)) {
1597 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1598 continue;
1599 }
1600 if (InVal.IsLoad)
1601 if (auto *I = dyn_cast<Instruction>(Op))
1602 combineMetadataForCSE(I, &Inst, false);
1603 if (!Inst.use_empty())
1604 Inst.replaceAllUsesWith(Op);
1605 salvageKnowledge(&Inst, &AC);
1606 removeMSSA(Inst);
1607 Inst.eraseFromParent();
1608 Changed = true;
1609 ++NumCSELoad;
1610 continue;
1611 }
1612
1613 // Otherwise, remember that we have this instruction.
1614 AvailableLoads.insert(MemInst.getPointerOperand(),
1615 LoadValue(&Inst, CurrentGeneration,
1616 MemInst.getMatchingId(),
1617 MemInst.isAtomic(),
1618 MemInst.isLoad()));
1619 LastStore = nullptr;
1620 continue;
1621 }
1622
1623 // If this instruction may read from memory, forget LastStore. Load/store
1624 // intrinsics will indicate both a read and a write to memory. The target
1625 // may override this (e.g. so that a store intrinsic does not read from
1626 // memory, and thus will be treated the same as a regular store for
1627 // commoning purposes).
1628 if (Inst.mayReadFromMemory() &&
1629 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1630 LastStore = nullptr;
1631
1632 // If this is a read-only or write-only call, process it. Skip store
1633 // MemInsts, as they will be more precisely handled later on. Also skip
1634 // memsets, as DSE may be able to optimize them better by removing the
1635 // earlier rather than later store.
1636 if (CallValue::canHandle(&Inst) &&
1637 (!MemInst.isValid() || !MemInst.isStore()) && !isa<MemSetInst>(&Inst)) {
1638 // If we have an available version of this call, and if it is the right
1639 // generation, replace this instruction.
1640 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1641 if (InVal.first != nullptr &&
1642 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1643 &Inst) &&
1644 InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) {
1645 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1646 << " to: " << *InVal.first << '\n');
1647 if (!DebugCounter::shouldExecute(CSECounter)) {
1648 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1649 continue;
1650 }
1651 combineIRFlags(Inst, InVal.first);
1652 if (!Inst.use_empty())
1653 Inst.replaceAllUsesWith(InVal.first);
1654 salvageKnowledge(&Inst, &AC);
1655 removeMSSA(Inst);
1656 Inst.eraseFromParent();
1657 Changed = true;
1658 ++NumCSECall;
1659 continue;
1660 }
1661
1662 // Increase memory generation for writes. Do this before inserting
1663 // the call, so it has the generation after the write occurred.
1664 if (Inst.mayWriteToMemory())
1665 ++CurrentGeneration;
1666
1667 // Otherwise, remember that we have this instruction.
1668 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1669 continue;
1670 }
1671
1672 // Compare GEP instructions based on offset.
1673 if (GEPValue::canHandle(&Inst)) {
1674 auto *GEP = cast<GetElementPtrInst>(&Inst);
1675 APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1676 GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)
1677 ? Offset.trySExtValue()
1678 : std::nullopt);
1679 if (Value *V = AvailableGEPs.lookup(GEPVal)) {
1680 LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V
1681 << '\n');
1682 combineIRFlags(Inst, V);
1683 Inst.replaceAllUsesWith(V);
1684 salvageKnowledge(&Inst, &AC);
1685 removeMSSA(Inst);
1686 Inst.eraseFromParent();
1687 Changed = true;
1688 ++NumCSEGEP;
1689 continue;
1690 }
1691
1692 // Otherwise, just remember that we have this GEP.
1693 AvailableGEPs.insert(GEPVal, &Inst);
1694 continue;
1695 }
1696
1697 // A release fence requires that all stores complete before it, but does
1698 // not prevent the reordering of following loads 'before' the fence. As a
1699 // result, we don't need to consider it as writing to memory and don't need
1700 // to advance the generation. We do need to prevent DSE across the fence,
1701 // but that's handled above.
1702 if (auto *FI = dyn_cast<FenceInst>(&Inst))
1703 if (FI->getOrdering() == AtomicOrdering::Release) {
1704 assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1705 continue;
1706 }
1707
1708 // write back DSE - If we write back the same value we just loaded from
1709 // the same location and haven't passed any intervening writes or ordering
1710 // operations, we can remove the write. The primary benefit is in allowing
1711 // the available load table to remain valid and value forward past where
1712 // the store originally was.
1713 if (MemInst.isValid() && MemInst.isStore()) {
1714 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1715 if (InVal.DefInst &&
1716 InVal.DefInst ==
1717 getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1718 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1719 if (!DebugCounter::shouldExecute(CSECounter)) {
1720 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1721 continue;
1722 }
1723 salvageKnowledge(&Inst, &AC);
1724 removeMSSA(Inst);
1725 Inst.eraseFromParent();
1726 Changed = true;
1727 ++NumDSE;
1728 // We can avoid incrementing the generation count since we were able
1729 // to eliminate this store.
1730 continue;
1731 }
1732 }
1733
1734 // Okay, this isn't something we can CSE at all. Check to see if it is
1735 // something that could modify memory. If so, our available memory values
1736 // cannot be used so bump the generation count.
1737 if (Inst.mayWriteToMemory()) {
1738 ++CurrentGeneration;
1739
1740 if (MemInst.isValid() && MemInst.isStore()) {
1741 // We do a trivial form of DSE if there are two stores to the same
1742 // location with no intervening loads. Delete the earlier store.
1743 if (LastStore) {
1744 if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1745 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1746 << " due to: " << Inst << '\n');
1747 if (!DebugCounter::shouldExecute(CSECounter)) {
1748 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1749 } else {
1750 salvageKnowledge(&Inst, &AC);
1751 removeMSSA(*LastStore);
1752 LastStore->eraseFromParent();
1753 Changed = true;
1754 ++NumDSE;
1755 LastStore = nullptr;
1756 }
1757 }
1758 // fallthrough - we can exploit information about this store
1759 }
1760
1761 // Okay, we just invalidated anything we knew about loaded values. Try
1762 // to salvage *something* by remembering that the stored value is a live
1763 // version of the pointer. It is safe to forward from volatile stores
1764 // to non-volatile loads, so we don't have to check for volatility of
1765 // the store.
1766 AvailableLoads.insert(MemInst.getPointerOperand(),
1767 LoadValue(&Inst, CurrentGeneration,
1768 MemInst.getMatchingId(),
1769 MemInst.isAtomic(),
1770 MemInst.isLoad()));
1771
1772 // Remember that this was the last unordered store we saw for DSE. We
1773 // don't yet handle DSE on ordered or volatile stores since we don't
1774 // have a good way to model the ordering requirement for following
1775 // passes once the store is removed. We could insert a fence, but
1776 // since fences are slightly stronger than stores in their ordering,
1777 // it's not clear this is a profitable transform. Another option would
1778 // be to merge the ordering with that of the post dominating store.
1779 if (MemInst.isUnordered() && !MemInst.isVolatile())
1780 LastStore = &Inst;
1781 else
1782 LastStore = nullptr;
1783 }
1784 }
1785 }
1786
1787 return Changed;
1788}
1789
1790bool EarlyCSE::run() {
1791 // Note, deque is being used here because there is significant performance
1792 // gains over vector when the container becomes very large due to the
1793 // specific access patterns. For more information see the mailing list
1794 // discussion on this:
1795 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1796 std::deque<StackNode *> nodesToProcess;
1797
1798 bool Changed = false;
1799
1800 // Process the root node.
1801 nodesToProcess.push_back(new StackNode(
1802 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1803 AvailableGEPs, CurrentGeneration, DT.getRootNode(),
1804 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1805
1806 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1807
1808 // Process the stack.
1809 while (!nodesToProcess.empty()) {
1810 // Grab the first item off the stack. Set the current generation, remove
1811 // the node from the stack, and process it.
1812 StackNode *NodeToProcess = nodesToProcess.back();
1813
1814 // Initialize class members.
1815 CurrentGeneration = NodeToProcess->currentGeneration();
1816
1817 // Check if the node needs to be processed.
1818 if (!NodeToProcess->isProcessed()) {
1819 // Process the node.
1820 Changed |= processNode(NodeToProcess->node());
1821 NodeToProcess->childGeneration(CurrentGeneration);
1822 NodeToProcess->process();
1823 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1824 // Push the next child onto the stack.
1825 DomTreeNode *child = NodeToProcess->nextChild();
1826 nodesToProcess.push_back(new StackNode(
1827 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1828 AvailableGEPs, NodeToProcess->childGeneration(), child,
1829 child->begin(), child->end()));
1830 } else {
1831 // It has been processed, and there are no more children to process,
1832 // so delete it and pop it off the stack.
1833 delete NodeToProcess;
1834 nodesToProcess.pop_back();
1835 }
1836 } // while (!nodes...)
1837
1838 return Changed;
1839}
1840
1843 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1844 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1845 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1846 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1847 auto *MSSA =
1848 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1849
1850 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1851
1852 if (!CSE.run())
1853 return PreservedAnalyses::all();
1854
1857 if (UseMemorySSA)
1859 return PA;
1860}
1861
1863 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1864 static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1865 OS, MapClassName2PassName);
1866 OS << '<';
1867 if (UseMemorySSA)
1868 OS << "memssa";
1869 OS << '>';
1870}
1871
1872namespace {
1873
1874/// A simple and fast domtree-based CSE pass.
1875///
1876/// This pass does a simple depth-first walk over the dominator tree,
1877/// eliminating trivially redundant instructions and using instsimplify to
1878/// canonicalize things as it goes. It is intended to be fast and catch obvious
1879/// cases so that instcombine and other passes are more effective. It is
1880/// expected that a later pass of GVN will catch the interesting/hard cases.
1881template<bool UseMemorySSA>
1882class EarlyCSELegacyCommonPass : public FunctionPass {
1883public:
1884 static char ID;
1885
1886 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1887 if (UseMemorySSA)
1889 else
1891 }
1892
1893 bool runOnFunction(Function &F) override {
1894 if (skipFunction(F))
1895 return false;
1896
1897 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1898 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1899 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1900 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1901 auto *MSSA =
1902 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1903
1904 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1905
1906 return CSE.run();
1907 }
1908
1909 void getAnalysisUsage(AnalysisUsage &AU) const override {
1910 AU.addRequired<AssumptionCacheTracker>();
1911 AU.addRequired<DominatorTreeWrapperPass>();
1912 AU.addRequired<TargetLibraryInfoWrapperPass>();
1913 AU.addRequired<TargetTransformInfoWrapperPass>();
1914 if (UseMemorySSA) {
1915 AU.addRequired<AAResultsWrapperPass>();
1916 AU.addRequired<MemorySSAWrapperPass>();
1917 AU.addPreserved<MemorySSAWrapperPass>();
1918 }
1919 AU.addPreserved<GlobalsAAWrapperPass>();
1920 AU.addPreserved<AAResultsWrapperPass>();
1921 AU.setPreservesCFG();
1922 }
1923};
1924
1925} // end anonymous namespace
1926
1927using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1928
1929template<>
1930char EarlyCSELegacyPass::ID = 0;
1931
1932INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1933 false)
1938INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1939
1940using EarlyCSEMemSSALegacyPass =
1941 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1942
1943template<>
1944char EarlyCSEMemSSALegacyPass::ID = 0;
1945
1947 if (UseMemorySSA)
1948 return new EarlyCSEMemSSALegacyPass();
1949 else
1950 return new EarlyCSELegacyPass();
1951}
1952
1953INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1954 "Early CSE w/ MemorySSA", false, false)
1961INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1962 "Early CSE w/ MemorySSA", false, false)
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines DenseMapInfo traits for DenseMap.
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
static void combineIRFlags(Instruction &From, Value *To)
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
early cse Early CSE w MemorySSA
static unsigned getHashValueImpl(SimpleValue Val)
Definition EarlyCSE.cpp:224
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
Definition EarlyCSE.cpp:345
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
Definition EarlyCSE.cpp:166
static unsigned hashCallInst(CallInst *CI)
Definition EarlyCSE.cpp:213
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This header defines various interfaces for pass management in LLVM.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Value * getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, BatchAAResults &BAA, function_ref< MemorySSA *()> GetMSSA)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
This file contains some templates that are useful if you are working with the STL at all.
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
bool isProcessed() const
unsigned currentGeneration() const
unsigned childGeneration() const
DomTreeNode::const_iterator end() const
void process()
DomTreeNode * nextChild()
DomTreeNode::const_iterator childIter() const
DomTreeNode * node()
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
bool onlyWritesMemory(unsigned OpNo) const
bool onlyReadsMemory(unsigned OpNo) const
bool isConvergent() const
Determine if the invoke is convergent.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI unsigned getIndexTypeSizeInBits(Type *Ty) const
The size in bits of the index used in GEP calculation for this type.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:579
static bool shouldExecute(CounterInfo &Counter)
iterator begin() const
iterator end() const
Analysis pass which computes a DominatorTree.
Definition Dominators.h:274
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:310
This instruction extracts a struct member or array element value from an aggregate value.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Definition Function.h:547
Represents calls to the gc.relocate intrinsic.
This instruction inserts a struct field of array element value into an aggregate value.
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1035
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:975
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
ScopedHashTableScope< SimpleValue, Value *, DenseMapInfo< SimpleValue >, AllocatorTy > ScopeTy
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Wrapper pass for TargetTransformInfo.
LLVM_ABI bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Value * getOperand(unsigned i) const
Definition User.h:207
iterator_range< value_op_iterator > operand_values()
Definition User.h:291
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
bool use_empty() const
Definition Value.h:346
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ ebStrict
This corresponds to "fpexcept.strict".
Definition FPEnv.h:42
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
Context & getContext() const
Definition BasicBlock.h:99
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1687
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
LLVM_ABI void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_UNKNOWN
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3120
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:325
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
LLVM_ABI FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
LLVM_ABI void initializeEarlyCSELegacyPassPass(PassRegistry &)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:305
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
unsigned Generation
static unsigned getHashValue(CallValue Val)
static bool isEqual(CallValue LHS, CallValue RHS)
static CallValue getEmptyKey()
Definition EarlyCSE.cpp:503
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS)
static unsigned getHashValue(const GEPValue &Val)
static GEPValue getEmptyKey()
Definition EarlyCSE.cpp:565
static SimpleValue getEmptyKey()
Definition EarlyCSE.cpp:157
static unsigned getHashValue(SimpleValue Val)
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89
const DataLayout & DL