LLVM 23.0.0git
PeepholeOptimizer.cpp
Go to the documentation of this file.
1//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Perform peephole optimizations on the machine code:
10//
11// - Optimize Extensions
12//
13// Optimization of sign / zero extension instructions. It may be extended to
14// handle other instructions with similar properties.
15//
16// On some targets, some instructions, e.g. X86 sign / zero extension, may
17// leave the source value in the lower part of the result. This optimization
18// will replace some uses of the pre-extension value with uses of the
19// sub-register of the results.
20//
21// - Optimize Comparisons
22//
23// Optimization of comparison instructions. For instance, in this code:
24//
25// sub r1, 1
26// cmp r1, 0
27// bz L1
28//
29// If the "sub" instruction all ready sets (or could be modified to set) the
30// same flag that the "cmp" instruction sets and that "bz" uses, then we can
31// eliminate the "cmp" instruction.
32//
33// Another instance, in this code:
34//
35// sub r1, r3 | sub r1, imm
36// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
37// bge L1
38//
39// If the branch instruction can use flag from "sub", then we can replace
40// "sub" with "subs" and eliminate the "cmp" instruction.
41//
42// - Optimize Loads:
43//
44// Loads that can be folded into a later instruction. A load is foldable
45// if it loads to virtual registers and the virtual register defined has
46// a single use.
47//
48// - Optimize Copies and Bitcast (more generally, target specific copies):
49//
50// Rewrite copies and bitcasts to avoid cross register bank copies
51// when possible.
52// E.g., Consider the following example, where capital and lower
53// letters denote different register file:
54// b = copy A <-- cross-bank copy
55// C = copy b <-- cross-bank copy
56// =>
57// b = copy A <-- cross-bank copy
58// C = copy A <-- same-bank copy
59//
60// E.g., for bitcast:
61// b = bitcast A <-- cross-bank copy
62// C = bitcast b <-- cross-bank copy
63// =>
64// b = bitcast A <-- cross-bank copy
65// C = copy A <-- same-bank copy
66//===----------------------------------------------------------------------===//
67
69#include "llvm/ADT/DenseMap.h"
71#include "llvm/ADT/SmallSet.h"
73#include "llvm/ADT/Statistic.h"
89#include "llvm/MC/LaneBitmask.h"
90#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/Pass.h"
93#include "llvm/Support/Debug.h"
95#include <cassert>
96#include <cstdint>
97#include <utility>
98
99using namespace llvm;
102
103#define DEBUG_TYPE "peephole-opt"
104
105// Optimize Extensions
106static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
107 cl::desc("Aggressive extension optimization"));
108
109static cl::opt<bool>
110 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
111 cl::desc("Disable the peephole optimizer"));
112
113/// Specifiy whether or not the value tracking looks through
114/// complex instructions. When this is true, the value tracker
115/// bails on everything that is not a copy or a bitcast.
116static cl::opt<bool>
117 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
118 cl::desc("Disable advanced copy optimization"));
119
121 "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
122 cl::desc("Disable non-allocatable physical register copy optimization"));
123
124// Limit the number of PHI instructions to process
125// in PeepholeOptimizer::getNextSource.
127 RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10),
128 cl::desc("Limit the length of PHI chains to lookup"));
129
130// Limit the length of recurrence chain when evaluating the benefit of
131// commuting operands.
133 "recurrence-chain-limit", cl::Hidden, cl::init(3),
134 cl::desc("Maximum length of recurrence chain when evaluating the benefit "
135 "of commuting operands"));
136
137STATISTIC(NumReuse, "Number of extension results reused");
138STATISTIC(NumCmps, "Number of compares eliminated");
139STATISTIC(NumImmFold, "Number of move immediate folded");
140STATISTIC(NumLoadFold, "Number of loads folded");
141STATISTIC(NumSelects, "Number of selects optimized");
142STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
143STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
144STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
145
146namespace {
147
148class ValueTrackerResult;
149class RecurrenceInstr;
150
151/// Interface to query instructions amenable to copy rewriting.
152class Rewriter {
153protected:
154 MachineInstr &CopyLike;
155 int CurrentSrcIdx = 0; ///< The index of the source being rewritten.
156public:
157 Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
158 virtual ~Rewriter() = default;
159
160 /// Get the next rewritable source (SrcReg, SrcSubReg) and
161 /// the related value that it affects (DstReg, DstSubReg).
162 /// A source is considered rewritable if its register class and the
163 /// register class of the related DstReg may not be register
164 /// coalescer friendly. In other words, given a copy-like instruction
165 /// not all the arguments may be returned at rewritable source, since
166 /// some arguments are none to be register coalescer friendly.
167 ///
168 /// Each call of this method moves the current source to the next
169 /// rewritable source.
170 /// For instance, let CopyLike be the instruction to rewrite.
171 /// CopyLike has one definition and one source:
172 /// dst.dstSubIdx = CopyLike src.srcSubIdx.
173 ///
174 /// The first call will give the first rewritable source, i.e.,
175 /// the only source this instruction has:
176 /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
177 /// This source defines the whole definition, i.e.,
178 /// (DstReg, DstSubReg) = (dst, dstSubIdx).
179 ///
180 /// The second and subsequent calls will return false, as there is only one
181 /// rewritable source.
182 ///
183 /// \return True if a rewritable source has been found, false otherwise.
184 /// The output arguments are valid if and only if true is returned.
185 virtual bool getNextRewritableSource(RegSubRegPair &Src,
186 RegSubRegPair &Dst) = 0;
187
188 /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
189 /// \return True if the rewriting was possible, false otherwise.
190 virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
191};
192
193/// Rewriter for COPY instructions.
194class CopyRewriter : public Rewriter {
195public:
196 CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
197 assert(MI.isCopy() && "Expected copy instruction");
198 }
199 ~CopyRewriter() override = default;
200
201 bool getNextRewritableSource(RegSubRegPair &Src,
202 RegSubRegPair &Dst) override {
203 if (++CurrentSrcIdx > 1)
204 return false;
205
206 // The rewritable source is the argument.
207 const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
208 Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
209 // What we track are the alternative sources of the definition.
210 const MachineOperand &MODef = CopyLike.getOperand(0);
211 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
212 return true;
213 }
214
215 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
216 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
217 MOSrc.setReg(NewReg);
218 MOSrc.setSubReg(NewSubReg);
219 return true;
220 }
221};
222
223/// Helper class to rewrite uncoalescable copy like instructions
224/// into new COPY (coalescable friendly) instructions.
225class UncoalescableRewriter : public Rewriter {
226 int NumDefs; ///< Number of defs in the bitcast.
227
228public:
229 UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
230 NumDefs = MI.getDesc().getNumDefs();
231 }
232
233 /// \see See Rewriter::getNextRewritableSource()
234 /// All such sources need to be considered rewritable in order to
235 /// rewrite a uncoalescable copy-like instruction. This method return
236 /// each definition that must be checked if rewritable.
237 bool getNextRewritableSource(RegSubRegPair &Src,
238 RegSubRegPair &Dst) override {
239 // Find the next non-dead definition and continue from there.
240 if (CurrentSrcIdx == NumDefs)
241 return false;
242
243 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
244 ++CurrentSrcIdx;
245 if (CurrentSrcIdx == NumDefs)
246 return false;
247 }
248
249 // What we track are the alternative sources of the definition.
250 Src = RegSubRegPair(0, 0);
251 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
252 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
253
254 CurrentSrcIdx++;
255 return true;
256 }
257
258 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
259 return false;
260 }
261};
262
263/// Specialized rewriter for INSERT_SUBREG instruction.
264class InsertSubregRewriter : public Rewriter {
265public:
266 InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
267 assert(MI.isInsertSubreg() && "Invalid instruction");
268 }
269
270 /// \see See Rewriter::getNextRewritableSource()
271 /// Here CopyLike has the following form:
272 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
273 /// Src1 has the same register class has dst, hence, there is
274 /// nothing to rewrite.
275 /// Src2.src2SubIdx, may not be register coalescer friendly.
276 /// Therefore, the first call to this method returns:
277 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
278 /// (DstReg, DstSubReg) = (dst, subIdx).
279 ///
280 /// Subsequence calls will return false.
281 bool getNextRewritableSource(RegSubRegPair &Src,
282 RegSubRegPair &Dst) override {
283 // If we already get the only source we can rewrite, return false.
284 if (CurrentSrcIdx == 2)
285 return false;
286 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
287 CurrentSrcIdx = 2;
288 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
289 Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
290 const MachineOperand &MODef = CopyLike.getOperand(0);
291
292 // We want to track something that is compatible with the
293 // partial definition.
294 if (MODef.getSubReg())
295 // Bail if we have to compose sub-register indices.
296 return false;
297 Dst = RegSubRegPair(MODef.getReg(),
298 (unsigned)CopyLike.getOperand(3).getImm());
299 return true;
300 }
301
302 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
303 if (CurrentSrcIdx != 2)
304 return false;
305 // We are rewriting the inserted reg.
306 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
307 MO.setReg(NewReg);
308 MO.setSubReg(NewSubReg);
309 return true;
310 }
311};
312
313/// Specialized rewriter for EXTRACT_SUBREG instruction.
314class ExtractSubregRewriter : public Rewriter {
315 const TargetInstrInfo &TII;
316
317public:
318 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
319 : Rewriter(MI), TII(TII) {
320 assert(MI.isExtractSubreg() && "Invalid instruction");
321 }
322
323 /// \see Rewriter::getNextRewritableSource()
324 /// Here CopyLike has the following form:
325 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
326 /// There is only one rewritable source: Src.subIdx,
327 /// which defines dst.dstSubIdx.
328 bool getNextRewritableSource(RegSubRegPair &Src,
329 RegSubRegPair &Dst) override {
330 // If we already get the only source we can rewrite, return false.
331 if (CurrentSrcIdx == 1)
332 return false;
333 // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
334 CurrentSrcIdx = 1;
335 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
336 // If we have to compose sub-register indices, bail out.
337 if (MOExtractedReg.getSubReg())
338 return false;
339
340 Src =
341 RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm());
342
343 // We want to track something that is compatible with the definition.
344 const MachineOperand &MODef = CopyLike.getOperand(0);
345 Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
346 return true;
347 }
348
349 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
350 // The only source we can rewrite is the input register.
351 if (CurrentSrcIdx != 1)
352 return false;
353
354 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
355
356 // If we find a source that does not require to extract something,
357 // rewrite the operation with a copy.
358 if (!NewSubReg) {
359 // Move the current index to an invalid position.
360 // We do not want another call to this method to be able
361 // to do any change.
362 CurrentSrcIdx = -1;
363 // Rewrite the operation as a COPY.
364 // Get rid of the sub-register index.
365 CopyLike.removeOperand(2);
366 // Morph the operation into a COPY.
367 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
368 return true;
369 }
370 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
371 return true;
372 }
373};
374
375/// Specialized rewriter for REG_SEQUENCE instruction.
376class RegSequenceRewriter : public Rewriter {
377public:
378 RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
379 assert(MI.isRegSequence() && "Invalid instruction");
380 CurrentSrcIdx = -1;
381 }
382
383 /// \see Rewriter::getNextRewritableSource()
384 /// Here CopyLike has the following form:
385 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
386 /// Each call will return a different source, walking all the available
387 /// source.
388 ///
389 /// The first call returns:
390 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
391 /// (DstReg, DstSubReg) = (dst, subIdx1).
392 ///
393 /// The second call returns:
394 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
395 /// (DstReg, DstSubReg) = (dst, subIdx2).
396 ///
397 /// And so on, until all the sources have been traversed, then
398 /// it returns false.
399 bool getNextRewritableSource(RegSubRegPair &Src,
400 RegSubRegPair &Dst) override {
401 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
402 CurrentSrcIdx += 2;
403 if (static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
404 return false;
405
406 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407 Src.Reg = MOInsertedReg.getReg();
408 Src.SubReg = MOInsertedReg.getSubReg();
409
410 // We want to track something that is compatible with the related
411 // partial definition.
412 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
413
414 const MachineOperand &MODef = CopyLike.getOperand(0);
415 Dst.Reg = MODef.getReg();
416 assert(MODef.getSubReg() == 0 && "cannot have subregister def in SSA");
417 return true;
418 }
419
420 bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
421 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
422 MO.setReg(NewReg);
423 MO.setSubReg(NewSubReg);
424 return true;
425 }
426};
427
428class PeepholeOptimizer : private MachineFunction::Delegate {
429 const TargetInstrInfo *TII = nullptr;
430 const TargetRegisterInfo *TRI = nullptr;
431 MachineRegisterInfo *MRI = nullptr;
432 MachineDominatorTree *DT = nullptr; // Machine dominator tree
433 MachineLoopInfo *MLI = nullptr;
434
435public:
436 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437 : DT(DT), MLI(MLI) {}
438
439 bool run(MachineFunction &MF);
440 /// Track Def -> Use info used for rewriting copies.
441 using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
442
443 /// Sequence of instructions that formulate recurrence cycle.
444 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
445
446private:
447 bool optimizeCmpInstr(MachineInstr &MI, MachineFunction &MF,
448 SmallPtrSet<MachineInstr *, 16> &LocalMIs);
449 bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
450 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
451 bool optimizeSelect(MachineInstr &MI,
452 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
453 bool optimizeCondBranch(MachineInstr &MI);
454
455 bool optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter);
456 bool optimizeCoalescableCopy(MachineInstr &MI);
457 bool optimizeUncoalescableCopy(MachineInstr &MI,
458 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
459 bool optimizeRecurrence(MachineInstr &PHI);
460 bool findNextSource(const TargetRegisterClass *DefRC, unsigned DefSubReg,
461 RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
462 bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
463 DenseMap<Register, MachineInstr *> &ImmDefMIs);
464 bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
465 DenseMap<Register, MachineInstr *> &ImmDefMIs,
466 bool &Deleted);
467
468 /// Finds recurrence cycles, but only ones that formulated around
469 /// a def operand and a use operand that are tied. If there is a use
470 /// operand commutable with the tied use operand, find recurrence cycle
471 /// along that operand as well.
472 bool findTargetRecurrence(Register Reg,
473 const SmallSet<Register, 2> &TargetReg,
474 RecurrenceCycle &RC);
475
476 /// If copy instruction \p MI is a virtual register copy or a copy of a
477 /// constant physical register to a virtual register, track it in the
478 /// set CopySrcMIs. If this virtual register was previously seen as a
479 /// copy, replace the uses of this copy with the previously seen copy's
480 /// destination register.
481 bool foldRedundantCopy(MachineInstr &MI);
482
483 /// Is the register \p Reg a non-allocatable physical register?
484 bool isNAPhysCopy(Register Reg);
485
486 /// If copy instruction \p MI is a non-allocatable virtual<->physical
487 /// register copy, track it in the \p NAPhysToVirtMIs map. If this
488 /// non-allocatable physical register was previously copied to a virtual
489 /// registered and hasn't been clobbered, the virt->phys copy can be
490 /// deleted.
491 bool
492 foldRedundantNAPhysCopy(MachineInstr &MI,
493 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
494
495 bool isLoadFoldable(MachineInstr &MI,
496 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
497
498 /// Try to fold the load defined by \p FoldReg into \p MI using
499 /// TII->optimizeLoadInstr. On success, updates \p LocalMIs, erases the old
500 /// instructions, and returns the replacement; returns nullptr otherwise.
501 MachineInstr *foldLoadInto(MachineFunction &MF, MachineInstr &MI,
502 Register FoldReg,
503 SmallPtrSet<MachineInstr *, 16> &LocalMIs);
504
505 /// Check whether \p MI is understood by the register coalescer
506 /// but may require some rewriting.
507 static bool isCoalescableCopy(const MachineInstr &MI) {
508 // SubregToRegs are not interesting, because they are already register
509 // coalescer friendly.
510 return MI.isCopy() ||
511 (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() ||
512 MI.isExtractSubreg()));
513 }
514
515 /// Check whether \p MI is a copy like instruction that is
516 /// not recognized by the register coalescer.
517 static bool isUncoalescableCopy(const MachineInstr &MI) {
518 return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() ||
519 MI.isInsertSubregLike() ||
520 MI.isExtractSubregLike()));
521 }
522
523 MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def,
524 RewriteMapTy &RewriteMap);
525
526 // Set of copies to virtual registers keyed by source register. Never
527 // holds any physreg which requires def tracking.
528 DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
529
530 // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
531 void MF_HandleInsertion(MachineInstr &MI) override {}
532
533 bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
534 if (!MI.isCopy())
535 return false;
536
537 Register SrcReg = MI.getOperand(1).getReg();
538 unsigned SrcSubReg = MI.getOperand(1).getSubReg();
539 if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
540 return false;
541
542 SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
543 return true;
544 }
545
546 // If a COPY instruction is to be deleted or changed, we should also remove
547 // it from CopySrcMIs.
548 void deleteChangedCopy(MachineInstr &MI) {
549 RegSubRegPair SrcPair;
550 if (!getCopySrc(MI, SrcPair))
551 return;
552
553 auto It = CopySrcMIs.find(SrcPair);
554 if (It != CopySrcMIs.end() && It->second == &MI)
555 CopySrcMIs.erase(It);
556 }
557
558 void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); }
559
560 void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override {
561 deleteChangedCopy(MI);
562 }
563};
564
565class PeepholeOptimizerLegacy : public MachineFunctionPass {
566public:
567 static char ID; // Pass identification
568
569 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {}
570
571 bool runOnMachineFunction(MachineFunction &MF) override;
572
573 void getAnalysisUsage(AnalysisUsage &AU) const override {
574 AU.setPreservesCFG();
576 AU.addRequired<MachineLoopInfoWrapperPass>();
577 AU.addPreserved<MachineLoopInfoWrapperPass>();
578 if (Aggressive) {
579 AU.addRequired<MachineDominatorTreeWrapperPass>();
580 AU.addPreserved<MachineDominatorTreeWrapperPass>();
581 }
582 }
583
584 MachineFunctionProperties getRequiredProperties() const override {
585 return MachineFunctionProperties().setIsSSA();
586 }
587};
588
589/// Helper class to hold instructions that are inside recurrence cycles.
590/// The recurrence cycle is formulated around 1) a def operand and its
591/// tied use operand, or 2) a def operand and a use operand that is commutable
592/// with another use operand which is tied to the def operand. In the latter
593/// case, index of the tied use operand and the commutable use operand are
594/// maintained with CommutePair.
595class RecurrenceInstr {
596public:
597 using IndexPair = std::pair<unsigned, unsigned>;
598
599 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
600 RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
601 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
602
603 MachineInstr *getMI() const { return MI; }
604 std::optional<IndexPair> getCommutePair() const { return CommutePair; }
605
606private:
607 MachineInstr *MI;
608 std::optional<IndexPair> CommutePair;
609};
610
611/// Helper class to hold a reply for ValueTracker queries.
612/// Contains the returned sources for a given search and the instructions
613/// where the sources were tracked from.
614class ValueTrackerResult {
615private:
616 /// Track all sources found by one ValueTracker query.
618
619 /// Instruction using the sources in 'RegSrcs'.
620 const MachineInstr *Inst = nullptr;
621
622public:
623 ValueTrackerResult() = default;
624
625 ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); }
626
627 bool isValid() const { return getNumSources() > 0; }
628
629 void setInst(const MachineInstr *I) { Inst = I; }
630 const MachineInstr *getInst() const { return Inst; }
631
632 void clear() {
633 RegSrcs.clear();
634 Inst = nullptr;
635 }
636
637 void addSource(Register SrcReg, unsigned SrcSubReg) {
638 RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
639 }
640
641 void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
642 assert(Idx < getNumSources() && "Reg pair source out of index");
643 RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
644 }
645
646 int getNumSources() const { return RegSrcs.size(); }
647
648 RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; }
649
650 Register getSrcReg(int Idx) const {
651 assert(Idx < getNumSources() && "Reg source out of index");
652 return RegSrcs[Idx].Reg;
653 }
654
655 unsigned getSrcSubReg(int Idx) const {
656 assert(Idx < getNumSources() && "SubReg source out of index");
657 return RegSrcs[Idx].SubReg;
658 }
659
660 bool operator==(const ValueTrackerResult &Other) const {
661 if (Other.getInst() != getInst())
662 return false;
663
664 if (Other.getNumSources() != getNumSources())
665 return false;
666
667 for (int i = 0, e = Other.getNumSources(); i != e; ++i)
668 if (Other.getSrcReg(i) != getSrcReg(i) ||
669 Other.getSrcSubReg(i) != getSrcSubReg(i))
670 return false;
671 return true;
672 }
673};
674
675/// Helper class to track the possible sources of a value defined by
676/// a (chain of) copy related instructions.
677/// Given a definition (instruction and definition index), this class
678/// follows the use-def chain to find successive suitable sources.
679/// The given source can be used to rewrite the definition into
680/// def = COPY src.
681///
682/// For instance, let us consider the following snippet:
683/// v0 =
684/// v2 = INSERT_SUBREG v1, v0, sub0
685/// def = COPY v2.sub0
686///
687/// Using a ValueTracker for def = COPY v2.sub0 will give the following
688/// suitable sources:
689/// v2.sub0 and v0.
690/// Then, def can be rewritten into def = COPY v0.
691class ValueTracker {
692private:
693 /// The current point into the use-def chain.
694 const MachineInstr *Def = nullptr;
695
696 /// The index of the definition in Def.
697 unsigned DefIdx = 0;
698
699 /// The sub register index of the definition.
700 unsigned DefSubReg;
701
702 /// The register where the value can be found.
703 Register Reg;
704
705 /// MachineRegisterInfo used to perform tracking.
706 const MachineRegisterInfo &MRI;
707
708 /// Optional TargetInstrInfo used to perform some complex tracking.
709 const TargetInstrInfo *TII;
710
711 /// Dispatcher to the right underlying implementation of getNextSource.
712 ValueTrackerResult getNextSourceImpl();
713
714 /// Specialized version of getNextSource for Copy instructions.
715 ValueTrackerResult getNextSourceFromCopy();
716
717 /// Specialized version of getNextSource for Bitcast instructions.
718 ValueTrackerResult getNextSourceFromBitcast();
719
720 /// Specialized version of getNextSource for RegSequence instructions.
721 ValueTrackerResult getNextSourceFromRegSequence();
722
723 /// Specialized version of getNextSource for InsertSubreg instructions.
724 ValueTrackerResult getNextSourceFromInsertSubreg();
725
726 /// Specialized version of getNextSource for ExtractSubreg instructions.
727 ValueTrackerResult getNextSourceFromExtractSubreg();
728
729 /// Specialized version of getNextSource for SubregToReg instructions.
730 ValueTrackerResult getNextSourceFromSubregToReg();
731
732 /// Specialized version of getNextSource for PHI instructions.
733 ValueTrackerResult getNextSourceFromPHI();
734
735public:
736 /// Create a ValueTracker instance for the value defined by \p Reg.
737 /// \p DefSubReg represents the sub register index the value tracker will
738 /// track. It does not need to match the sub register index used in the
739 /// definition of \p Reg.
740 /// If \p Reg is a physical register, a value tracker constructed with
741 /// this constructor will not find any alternative source.
742 /// Indeed, when \p Reg is a physical register that constructor does not
743 /// know which definition of \p Reg it should track.
744 /// Use the next constructor to track a physical register.
745 ValueTracker(Register Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI,
746 const TargetInstrInfo *TII = nullptr)
747 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
748 if (!Reg.isPhysical()) {
749 Def = MRI.getVRegDef(Reg);
750 DefIdx = MRI.def_begin(Reg).getOperandNo();
751 }
752 }
753
754 /// Following the use-def chain, get the next available source
755 /// for the tracked value.
756 /// \return A ValueTrackerResult containing a set of registers
757 /// and sub registers with tracked values. A ValueTrackerResult with
758 /// an empty set of registers means no source was found.
759 ValueTrackerResult getNextSource();
760};
761
762} // end anonymous namespace
763
764char PeepholeOptimizerLegacy::ID = 0;
765
766char &llvm::PeepholeOptimizerLegacyID = PeepholeOptimizerLegacy::ID;
767
768INITIALIZE_PASS_BEGIN(PeepholeOptimizerLegacy, DEBUG_TYPE,
769 "Peephole Optimizations", false, false)
772INITIALIZE_PASS_END(PeepholeOptimizerLegacy, DEBUG_TYPE,
773 "Peephole Optimizations", false, false)
774
775/// If instruction is a copy-like instruction, i.e. it reads a single register
776/// and writes a single register and it does not modify the source, and if the
777/// source value is preserved as a sub-register of the result, then replace all
778/// reachable uses of the source with the subreg of the result.
779///
780/// Do not generate an EXTRACT that is used only in a debug use, as this changes
781/// the code. Since this code does not currently share EXTRACTs, just ignore all
782/// debug uses.
783bool PeepholeOptimizer::optimizeExtInstr(
785 SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
786 Register SrcReg, DstReg;
787 unsigned SubIdx;
788 if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
789 return false;
790
791 if (DstReg.isPhysical() || SrcReg.isPhysical())
792 return false;
793
794 if (MRI->hasOneNonDBGUse(SrcReg))
795 // No other uses.
796 return false;
797
798 // Ensure DstReg can get a register class that actually supports
799 // sub-registers. Don't change the class until we commit.
800 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
801 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
802 if (!DstRC)
803 return false;
804
805 // The ext instr may be operating on a sub-register of SrcReg as well.
806 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
807 // register.
808 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
809 // SrcReg:SubIdx should be replaced.
810 bool UseSrcSubIdx =
811 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
812
813 // The source has other uses. See if we can replace the other uses with use of
814 // the result of the extension.
816 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
817 ReachedBBs.insert(UI.getParent());
818
819 // Uses that are in the same BB of uses of the result of the instruction.
821
822 // Uses that the result of the instruction can reach.
824
825 bool ExtendLife = true;
826 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
827 MachineInstr *UseMI = UseMO.getParent();
828 if (UseMI == &MI)
829 continue;
830
831 if (UseMI->isPHI()) {
832 ExtendLife = false;
833 continue;
834 }
835
836 // Only accept uses of SrcReg:SubIdx.
837 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
838 continue;
839
840 // It's an error to translate this:
841 //
842 // %reg1025 = <sext> %reg1024
843 // ...
844 // %reg1026 = SUBREG_TO_REG %reg1024, 4
845 //
846 // into this:
847 //
848 // %reg1025 = <sext> %reg1024
849 // ...
850 // %reg1027 = COPY %reg1025:4
851 // %reg1026 = SUBREG_TO_REG %reg1027, 4
852 //
853 // The problem here is that SUBREG_TO_REG is there to assert that an
854 // implicit zext occurs. It doesn't insert a zext instruction. If we allow
855 // the COPY here, it will give us the value after the <sext>, not the
856 // original value of %reg1024 before <sext>.
857 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
858 continue;
859
860 MachineBasicBlock *UseMBB = UseMI->getParent();
861 if (UseMBB == &MBB) {
862 // Local uses that come after the extension.
863 if (!LocalMIs.count(UseMI))
864 Uses.push_back(&UseMO);
865 } else if (ReachedBBs.count(UseMBB)) {
866 // Non-local uses where the result of the extension is used. Always
867 // replace these unless it's a PHI.
868 Uses.push_back(&UseMO);
869 } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
870 // We may want to extend the live range of the extension result in order
871 // to replace these uses.
872 ExtendedUses.push_back(&UseMO);
873 } else {
874 // Both will be live out of the def MBB anyway. Don't extend live range of
875 // the extension result.
876 ExtendLife = false;
877 break;
878 }
879 }
880
881 if (ExtendLife && !ExtendedUses.empty())
882 // Extend the liveness of the extension result.
883 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
884
885 // Now replace all uses.
886 bool Changed = false;
887 if (!Uses.empty()) {
888 SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
889
890 // Look for PHI uses of the extended result, we don't want to extend the
891 // liveness of a PHI input. It breaks all kinds of assumptions down
892 // stream. A PHI use is expected to be the kill of its source values.
893 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
894 if (UI.isPHI())
895 PHIBBs.insert(UI.getParent());
896
897 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
898 for (MachineOperand *UseMO : Uses) {
899 MachineInstr *UseMI = UseMO->getParent();
900 MachineBasicBlock *UseMBB = UseMI->getParent();
901 if (PHIBBs.count(UseMBB))
902 continue;
903
904 // About to add uses of DstReg, clear DstReg's kill flags.
905 if (!Changed) {
906 MRI->clearKillFlags(DstReg);
907 MRI->constrainRegClass(DstReg, DstRC);
908 }
909
910 // SubReg defs are illegal in machine SSA phase,
911 // we should not generate SubReg defs.
912 //
913 // For example, for the instructions:
914 //
915 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
916 // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
917 //
918 // We should generate:
919 //
920 // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
921 // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
922 // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
923 //
924 if (UseSrcSubIdx)
925 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
926
927 Register NewVR = MRI->createVirtualRegister(RC);
928 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
929 TII->get(TargetOpcode::COPY), NewVR)
930 .addReg(DstReg, {}, SubIdx);
931 if (UseSrcSubIdx)
932 UseMO->setSubReg(0);
933
934 UseMO->setReg(NewVR);
935 ++NumReuse;
936 Changed = true;
937 }
938 }
939
940 return Changed;
941}
942
943/// If the instruction is a compare and the previous instruction it's comparing
944/// against already sets (or could be modified to set) the same flag as the
945/// compare, then we can remove the comparison and use the flag from the
946/// previous instruction.
947bool PeepholeOptimizer::optimizeCmpInstr(
950 // If this instruction is a comparison against zero and isn't comparing a
951 // physical register, we can try to optimize it.
952 Register SrcReg, SrcReg2;
953 int64_t CmpMask, CmpValue;
954 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
955 SrcReg.isPhysical() || SrcReg2.isPhysical())
956 return false;
957
958 // Attempt to optimize the comparison instruction.
959 LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
960 if (!TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI))
961 return false;
962
963 LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n");
964 ++NumCmps;
965
966 // The eliminated compare may have been the extra use preventing a
967 // load from being folded into the flag-setting instruction.
968 if (SrcReg.isVirtual() && MRI->hasOneNonDBGUser(SrcReg)) {
969 MachineInstr *FlagProducer = MRI->use_nodbg_begin(SrcReg)->getParent();
970 MachineInstr *LoadMI = MRI->getVRegDef(SrcReg);
971 // No store between LoadMI and FlagProducer that could change the value.
972 if (LocalMIs.count(FlagProducer) && LoadMI && LoadMI->canFoldAsLoad() &&
973 LoadMI->mayLoad() && LocalMIs.count(LoadMI) &&
975 make_range(std::next(LoadMI->getIterator()),
976 FlagProducer->getIterator()),
977 [](const MachineInstr &I) { return I.isLoadFoldBarrier(); }))
978 foldLoadInto(MF, *FlagProducer, SrcReg, LocalMIs);
979 }
980
981 return true;
982}
983
984/// Optimize a select instruction.
985bool PeepholeOptimizer::optimizeSelect(
986 MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
987 assert(MI.isSelect() && "Should only be called when MI->isSelect() is true");
988 if (!TII->optimizeSelect(MI, LocalMIs))
989 return false;
990 LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
991 MI.eraseFromParent();
992 ++NumSelects;
993 return true;
994}
995
996/// Check if a simpler conditional branch can be generated.
997bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
998 return TII->optimizeCondBranch(MI);
999}
1000
1001/// Try to find a better source value that shares the same register file to
1002/// replace \p RegSubReg in an instruction like
1003/// `DefRC.DefSubReg = COPY RegSubReg`
1004///
1005/// When true is returned, the \p RewriteMap can be used by the client to
1006/// retrieve all Def -> Use along the way up to the next source. Any found
1007/// Use that is not itself a key for another entry, is the next source to
1008/// use. During the search for the next source, multiple sources can be found
1009/// given multiple incoming sources of a PHI instruction. In this case, we
1010/// look in each PHI source for the next source; all found next sources must
1011/// share the same register file as \p Reg and \p SubReg. The client should
1012/// then be capable to rewrite all intermediate PHIs to get the next source.
1013/// \return False if no alternative sources are available. True otherwise.
1014bool PeepholeOptimizer::findNextSource(const TargetRegisterClass *DefRC,
1015 unsigned DefSubReg,
1016 RegSubRegPair RegSubReg,
1017 RewriteMapTy &RewriteMap) {
1018 // Do not try to find a new source for a physical register.
1019 // So far we do not have any motivating example for doing that.
1020 // Thus, instead of maintaining untested code, we will revisit that if
1021 // that changes at some point.
1022 Register Reg = RegSubReg.Reg;
1023 RegSubRegPair CurSrcPair = RegSubReg;
1024 SmallVector<RegSubRegPair, 4> SrcToLook = {CurSrcPair};
1025
1026 unsigned PHICount = 0;
1027 do {
1028 CurSrcPair = SrcToLook.pop_back_val();
1029 // As explained above, do not handle physical registers
1030 if (CurSrcPair.Reg.isPhysical())
1031 return false;
1032
1033 ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
1034
1035 // Follow the chain of copies until we find a more suitable source, a phi
1036 // or have to abort.
1037 while (true) {
1038 ValueTrackerResult Res = ValTracker.getNextSource();
1039 // Abort at the end of a chain (without finding a suitable source).
1040 if (!Res.isValid())
1041 return false;
1042
1043 // Insert the Def -> Use entry for the recently found source.
1044 auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1045
1046 if (!WasInserted) {
1047 const ValueTrackerResult &CurSrcRes = InsertPt->second;
1048
1049 assert(CurSrcRes == Res && "ValueTrackerResult found must match");
1050 // An existent entry with multiple sources is a PHI cycle we must avoid.
1051 // Otherwise it's an entry with a valid next source we already found.
1052 if (CurSrcRes.getNumSources() > 1) {
1054 << "findNextSource: found PHI cycle, aborting...\n");
1055 return false;
1056 }
1057 break;
1058 }
1059
1060 // ValueTrackerResult usually have one source unless it's the result from
1061 // a PHI instruction. Add the found PHI edges to be looked up further.
1062 unsigned NumSrcs = Res.getNumSources();
1063 if (NumSrcs > 1) {
1064 PHICount++;
1065 if (PHICount >= RewritePHILimit) {
1066 LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
1067 return false;
1068 }
1069
1070 for (unsigned i = 0; i < NumSrcs; ++i)
1071 SrcToLook.push_back(Res.getSrc(i));
1072 break;
1073 }
1074
1075 CurSrcPair = Res.getSrc(0);
1076 // Do not extend the live-ranges of physical registers as they add
1077 // constraints to the register allocator. Moreover, if we want to extend
1078 // the live-range of a physical register, unlike SSA virtual register,
1079 // we will have to check that they aren't redefine before the related use.
1080 if (CurSrcPair.Reg.isPhysical())
1081 return false;
1082
1083 // Keep following the chain if the value isn't any better yet.
1084 const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
1085 if (!TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1086 CurSrcPair.SubReg))
1087 continue;
1088
1089 // We currently cannot deal with subreg operands on PHI instructions
1090 // (see insertPHI()).
1091 if (PHICount > 0 && CurSrcPair.SubReg != 0)
1092 continue;
1093
1094 // We found a suitable source, and are done with this chain.
1095 break;
1096 }
1097 } while (!SrcToLook.empty());
1098
1099 // If we did not find a more suitable source, there is nothing to optimize.
1100 return CurSrcPair.Reg != Reg;
1101}
1102
1103/// Insert a PHI instruction with incoming edges \p SrcRegs that are
1104/// guaranteed to have the same register class. This is necessary whenever we
1105/// successfully traverse a PHI instruction and find suitable sources coming
1106/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
1107/// suitable to be used in a new COPY instruction.
1109 const TargetInstrInfo &TII,
1110 const SmallVectorImpl<RegSubRegPair> &SrcRegs,
1111 MachineInstr &OrigPHI) {
1112 assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
1113
1114 const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
1115 // NewRC is only correct if no subregisters are involved. findNextSource()
1116 // should have rejected those cases already.
1117 assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
1118 Register NewVR = MRI.createVirtualRegister(NewRC);
1119 MachineBasicBlock *MBB = OrigPHI.getParent();
1120 MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
1121 TII.get(TargetOpcode::PHI), NewVR);
1122
1123 unsigned MBBOpIdx = 2;
1124 for (const RegSubRegPair &RegPair : SrcRegs) {
1125 MIB.addReg(RegPair.Reg, {}, RegPair.SubReg);
1126 MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
1127 // Since we're extended the lifetime of RegPair.Reg, clear the
1128 // kill flags to account for that and make RegPair.Reg reaches
1129 // the new PHI.
1130 MRI.clearKillFlags(RegPair.Reg);
1131 MBBOpIdx += 2;
1132 }
1133
1134 return *MIB;
1135}
1136
1137/// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
1138/// the new source to use for rewrite. If \p HandleMultipleSources is true and
1139/// multiple sources for a given \p Def are found along the way, we found a
1140/// PHI instructions that needs to be rewritten.
1141/// TODO: HandleMultipleSources should be removed once we test PHI handling
1142/// with coalescable copies.
1143static RegSubRegPair
1145 RegSubRegPair Def,
1146 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1147 bool HandleMultipleSources = true) {
1148 RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
1149 while (true) {
1150 ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
1151 // If there are no entries on the map, LookupSrc is the new source.
1152 if (!Res.isValid())
1153 return LookupSrc;
1154
1155 // There's only one source for this definition, keep searching...
1156 unsigned NumSrcs = Res.getNumSources();
1157 if (NumSrcs == 1) {
1158 LookupSrc.Reg = Res.getSrcReg(0);
1159 LookupSrc.SubReg = Res.getSrcSubReg(0);
1160 continue;
1161 }
1162
1163 // TODO: Remove once multiple srcs w/ coalescable copies are supported.
1164 if (!HandleMultipleSources)
1165 break;
1166
1167 // Multiple sources, recurse into each source to find a new source
1168 // for it. Then, rewrite the PHI accordingly to its new edges.
1170 for (unsigned i = 0; i < NumSrcs; ++i) {
1171 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1172 NewPHISrcs.push_back(
1173 getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
1174 }
1175
1176 // Build the new PHI node and return its def register as the new source.
1177 MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
1178 MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
1179 LLVM_DEBUG(dbgs() << "-- getNewSource\n");
1180 LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI);
1181 LLVM_DEBUG(dbgs() << " With: " << NewPHI);
1182 const MachineOperand &MODef = NewPHI.getOperand(0);
1183 return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1184 }
1185
1186 return RegSubRegPair(0, 0);
1187}
1188
1189bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
1190 bool Changed = false;
1191 // Get the right rewriter for the current copy.
1192 // Rewrite each rewritable source.
1193 RegSubRegPair Dst;
1194 RegSubRegPair TrackPair;
1195 while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1196 if (Dst.Reg.isPhysical()) {
1197 // Do not try to find a new source for a physical register.
1198 // So far we do not have any motivating example for doing that.
1199 // Thus, instead of maintaining untested code, we will revisit that if
1200 // that changes at some point.
1201 continue;
1202 }
1203
1204 const TargetRegisterClass *DefRC = MRI->getRegClass(Dst.Reg);
1205
1206 // Keep track of PHI nodes and its incoming edges when looking for sources.
1207 RewriteMapTy RewriteMap;
1208 // Try to find a more suitable source. If we failed to do so, or get the
1209 // actual source, move to the next source.
1210 if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1211 continue;
1212
1213 // Get the new source to rewrite. TODO: Only enable handling of multiple
1214 // sources (PHIs) once we have a motivating example and testcases for it.
1215 RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
1216 /*HandleMultipleSources=*/false);
1217 assert(TrackPair.Reg != NewSrc.Reg &&
1218 "should not rewrite source to original value");
1219 if (!NewSrc.Reg)
1220 continue;
1221
1222 if (NewSrc.SubReg) {
1223 // Verify the register class supports the subregister index. ARM's
1224 // copy-like queries return register:subreg pairs where the register's
1225 // current class does not directly support the subregister index.
1226 const TargetRegisterClass *RC = MRI->getRegClass(NewSrc.Reg);
1227 const TargetRegisterClass *WithSubRC =
1228 TRI->getSubClassWithSubReg(RC, NewSrc.SubReg);
1229 if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
1230 continue;
1231 Changed = true;
1232 }
1233
1234 // Rewrite source.
1235 if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
1236 // We may have extended the live-range of NewSrc, account for that.
1237 MRI->clearKillFlags(NewSrc.Reg);
1238 Changed = true;
1239 }
1240 }
1241
1242 // TODO: We could have a clean-up method to tidy the instruction.
1243 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
1244 // => v0 = COPY v1
1245 // Currently we haven't seen motivating example for that and we
1246 // want to avoid untested code.
1247 NumRewrittenCopies += Changed;
1248 return Changed;
1249}
1250
1251/// Optimize generic copy instructions to avoid cross register bank copy.
1252/// The optimization looks through a chain of copies and tries to find a source
1253/// that has a compatible register class.
1254/// Two register classes are considered to be compatible if they share the same
1255/// register bank.
1256/// New copies issued by this optimization are register allocator
1257/// friendly. This optimization does not remove any copy as it may
1258/// overconstrain the register allocator, but replaces some operands
1259/// when possible.
1260/// \pre isCoalescableCopy(*MI) is true.
1261/// \return True, when \p MI has been rewritten. False otherwise.
1262bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
1263 assert(isCoalescableCopy(MI) && "Invalid argument");
1264 assert(MI.getDesc().getNumDefs() == 1 &&
1265 "Coalescer can understand multiple defs?!");
1266 const MachineOperand &MODef = MI.getOperand(0);
1267 // Do not rewrite physical definitions.
1268 if (MODef.getReg().isPhysical())
1269 return false;
1270
1271 switch (MI.getOpcode()) {
1272 case TargetOpcode::COPY:
1273 return optimizeCoalescableCopyImpl(CopyRewriter(MI));
1274 case TargetOpcode::INSERT_SUBREG:
1275 return optimizeCoalescableCopyImpl(InsertSubregRewriter(MI));
1276 case TargetOpcode::EXTRACT_SUBREG:
1277 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(MI, *TII));
1278 case TargetOpcode::REG_SEQUENCE:
1279 return optimizeCoalescableCopyImpl(RegSequenceRewriter(MI));
1280 default:
1281 // Handle uncoalescable copy-like instructions.
1282 if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
1283 MI.isExtractSubregLike())
1284 return optimizeCoalescableCopyImpl(UncoalescableRewriter(MI));
1285 return false;
1286 }
1287}
1288
1289/// Rewrite the source found through \p Def, by using the \p RewriteMap
1290/// and create a new COPY instruction. More info about RewriteMap in
1291/// PeepholeOptimizer::findNextSource. Right now this is only used to handle
1292/// Uncoalescable copies, since they are copy like instructions that aren't
1293/// recognized by the register allocator.
1294MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1295 RegSubRegPair Def,
1296 RewriteMapTy &RewriteMap) {
1297 assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
1298
1299 // Find the new source to use in the COPY rewrite.
1300 RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
1301
1302 // Insert the COPY.
1303 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1304 Register NewVReg = MRI->createVirtualRegister(DefRC);
1305
1306 if (NewSrc.SubReg) {
1307 const TargetRegisterClass *NewSrcRC = MRI->getRegClass(NewSrc.Reg);
1308 const TargetRegisterClass *WithSubRC =
1309 TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.SubReg);
1310
1311 // The new source may not directly support the subregister, but we should be
1312 // able to assume it is constrainable to support the subregister (otherwise
1313 // ValueTracker was lying and reported a useless value).
1314 if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
1315 llvm_unreachable("replacement register cannot support subregister");
1316 }
1317
1318 MachineInstr *NewCopy =
1319 BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
1320 TII->get(TargetOpcode::COPY), NewVReg)
1321 .addReg(NewSrc.Reg, {}, NewSrc.SubReg);
1322
1323 if (Def.SubReg) {
1324 NewCopy->getOperand(0).setSubReg(Def.SubReg);
1325 NewCopy->getOperand(0).setIsUndef();
1326 }
1327
1328 LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
1329 LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike);
1330 LLVM_DEBUG(dbgs() << " With: " << *NewCopy);
1331 MRI->replaceRegWith(Def.Reg, NewVReg);
1332 MRI->clearKillFlags(NewVReg);
1333
1334 // We extended the lifetime of NewSrc.Reg, clear the kill flags to
1335 // account for that.
1336 MRI->clearKillFlags(NewSrc.Reg);
1337
1338 return *NewCopy;
1339}
1340
1341/// Optimize copy-like instructions to create
1342/// register coalescer friendly instruction.
1343/// The optimization tries to kill-off the \p MI by looking
1344/// through a chain of copies to find a source that has a compatible
1345/// register class.
1346/// If such a source is found, it replace \p MI by a generic COPY
1347/// operation.
1348/// \pre isUncoalescableCopy(*MI) is true.
1349/// \return True, when \p MI has been optimized. In that case, \p MI has
1350/// been removed from its parent.
1351/// All COPY instructions created, are inserted in \p LocalMIs.
1352bool PeepholeOptimizer::optimizeUncoalescableCopy(
1353 MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1354 assert(isUncoalescableCopy(MI) && "Invalid argument");
1355 UncoalescableRewriter CpyRewriter(MI);
1356
1357 // Rewrite each rewritable source by generating new COPYs. This works
1358 // differently from optimizeCoalescableCopy since it first makes sure that all
1359 // definitions can be rewritten.
1360 RewriteMapTy RewriteMap;
1361 RegSubRegPair Src;
1363 SmallVector<RegSubRegPair, 4> RewritePairs;
1364 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1365 // If a physical register is here, this is probably for a good reason.
1366 // Do not rewrite that.
1367 if (Def.Reg.isPhysical())
1368 return false;
1369
1370 // FIXME: Uncoalescable copies are treated differently by
1371 // UncoalescableRewriter, and this probably should not share
1372 // API. getNextRewritableSource really finds rewritable defs.
1373 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1374
1375 // If we do not know how to rewrite this definition, there is no point
1376 // in trying to kill this instruction.
1377 if (!findNextSource(DefRC, Def.SubReg, Def, RewriteMap))
1378 return false;
1379
1380 RewritePairs.push_back(Def);
1381 }
1382
1383 // The change is possible for all defs, do it.
1384 for (const RegSubRegPair &Def : RewritePairs) {
1385 // Rewrite the "copy" in a way the register coalescer understands.
1386 MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
1387 LocalMIs.insert(&NewCopy);
1388 }
1389
1390 // MI is now dead.
1391 LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
1392 MI.eraseFromParent();
1393 ++NumUncoalescableCopies;
1394 return true;
1395}
1396
1397/// Check whether MI is a candidate for folding into a later instruction.
1398/// We only fold loads to virtual registers and the virtual register defined
1399/// has a single user.
1400bool PeepholeOptimizer::isLoadFoldable(
1401 MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1402 if (!MI.canFoldAsLoad() || !MI.mayLoad())
1403 return false;
1404 const MCInstrDesc &MCID = MI.getDesc();
1405 if (MCID.getNumDefs() != 1)
1406 return false;
1407
1408 Register Reg = MI.getOperand(0).getReg();
1409 // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
1410 // loads. It should be checked when processing uses of the load, since
1411 // uses can be removed during peephole.
1412 if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
1413 MRI->hasOneNonDBGUser(Reg)) {
1414 FoldAsLoadDefCandidates.insert(Reg);
1415 return true;
1416 }
1417 return false;
1418}
1419
1420MachineInstr *
1421PeepholeOptimizer::foldLoadInto(MachineFunction &MF, MachineInstr &MI,
1422 Register FoldReg,
1423 SmallPtrSet<MachineInstr *, 16> &LocalMIs) {
1424 Register Reg = FoldReg;
1425 MachineInstr *DefMI = nullptr;
1426 MachineInstr *CopyMI = nullptr;
1427 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, Reg, DefMI, CopyMI);
1428 if (!FoldMI)
1429 return nullptr;
1430 LLVM_DEBUG(dbgs() << "Replacing: " << MI << " With: " << *FoldMI);
1431 LocalMIs.erase(&MI);
1432 LocalMIs.erase(DefMI);
1433 LocalMIs.insert(FoldMI);
1434 if (CopyMI)
1435 LocalMIs.insert(CopyMI);
1436 if (MI.shouldUpdateAdditionalCallInfo())
1437 MF.moveAdditionalCallInfo(&MI, FoldMI);
1438 MI.eraseFromParent();
1440 MRI->markUsesInDebugValueAsUndef(FoldReg);
1441 ++NumLoadFold;
1442 return FoldMI;
1443}
1444
1445bool PeepholeOptimizer::isMoveImmediate(
1446 MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1447 DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1448 const MCInstrDesc &MCID = MI.getDesc();
1449 if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
1450 return false;
1451 Register Reg = MI.getOperand(0).getReg();
1452 if (!Reg.isVirtual())
1453 return false;
1454
1455 int64_t ImmVal;
1456 if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
1457 return false;
1458
1459 ImmDefMIs.insert(std::make_pair(Reg, &MI));
1460 ImmDefRegs.insert(Reg);
1461 return true;
1462}
1463
1464/// Try folding register operands that are defined by move immediate
1465/// instructions, i.e. a trivial constant folding optimization, if
1466/// and only if the def and use are in the same BB.
1467bool PeepholeOptimizer::foldImmediate(
1468 MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1469 DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
1470 Deleted = false;
1471 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1472 MachineOperand &MO = MI.getOperand(i);
1473 if (!MO.isReg() || MO.isDef())
1474 continue;
1475 Register Reg = MO.getReg();
1476 if (!Reg.isVirtual())
1477 continue;
1478 if (ImmDefRegs.count(Reg) == 0)
1479 continue;
1480 auto II = ImmDefMIs.find(Reg);
1481 assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
1482 if (TII->foldImmediate(MI, *II->second, Reg, MRI)) {
1483 ++NumImmFold;
1484 // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
1485 // is not deleted, and we happened to get a same MI, we can delete MI and
1486 // replace its users.
1487 if (MRI->getVRegDef(Reg) &&
1488 MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
1489 Register DstReg = MI.getOperand(0).getReg();
1490 if (DstReg.isVirtual() &&
1491 MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
1492 MRI->replaceRegWith(DstReg, Reg);
1493 MRI->clearKillFlags(Reg);
1494 MI.eraseFromParent();
1495 Deleted = true;
1496 }
1497 }
1498 return true;
1499 }
1500 }
1501 return false;
1502}
1503
1504// FIXME: This is very simple and misses some cases which should be handled when
1505// motivating examples are found.
1506//
1507// The copy rewriting logic should look at uses as well as defs and be able to
1508// eliminate copies across blocks.
1509//
1510// Later copies that are subregister extracts will also not be eliminated since
1511// only the first copy is considered.
1512//
1513// e.g.
1514// %1 = COPY %0
1515// %2 = COPY %0:sub1
1516//
1517// Should replace %2 uses with %1:sub1
1518bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) {
1519 assert(MI.isCopy() && "expected a COPY machine instruction");
1520
1521 RegSubRegPair SrcPair;
1522 if (!getCopySrc(MI, SrcPair))
1523 return false;
1524
1525 Register DstReg = MI.getOperand(0).getReg();
1526 if (!DstReg.isVirtual())
1527 return false;
1528
1529 if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) {
1530 // First copy of this reg seen.
1531 return false;
1532 }
1533
1534 MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second;
1535
1536 assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() &&
1537 "Unexpected mismatching subreg!");
1538
1539 Register PrevDstReg = PrevCopy->getOperand(0).getReg();
1540
1541 // Only replace if the copy register class is the same.
1542 //
1543 // TODO: If we have multiple copies to different register classes, we may want
1544 // to track multiple copies of the same source register.
1545 if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
1546 return false;
1547
1548 MRI->replaceRegWith(DstReg, PrevDstReg);
1549
1550 // Lifetime of the previous copy has been extended.
1551 MRI->clearKillFlags(PrevDstReg);
1552 return true;
1553}
1554
1555bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
1556 return Reg.isPhysical() && !MRI->isAllocatable(Reg);
1557}
1558
1559bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1560 MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1561 assert(MI.isCopy() && "expected a COPY machine instruction");
1562
1564 return false;
1565
1566 Register DstReg = MI.getOperand(0).getReg();
1567 Register SrcReg = MI.getOperand(1).getReg();
1568 if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
1569 // %vreg = COPY $physreg
1570 // Avoid using a datastructure which can track multiple live non-allocatable
1571 // phys->virt copies since LLVM doesn't seem to do this.
1572 NAPhysToVirtMIs.insert({SrcReg, &MI});
1573 return false;
1574 }
1575
1576 if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
1577 return false;
1578
1579 // $physreg = COPY %vreg
1580 auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
1581 if (PrevCopy == NAPhysToVirtMIs.end()) {
1582 // We can't remove the copy: there was an intervening clobber of the
1583 // non-allocatable physical register after the copy to virtual.
1584 LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
1585 << MI);
1586 return false;
1587 }
1588
1589 Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
1590 if (PrevDstReg == SrcReg) {
1591 // Remove the virt->phys copy: we saw the virtual register definition, and
1592 // the non-allocatable physical register's state hasn't changed since then.
1593 LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
1594 ++NumNAPhysCopies;
1595 return true;
1596 }
1597
1598 // Potential missed optimization opportunity: we saw a different virtual
1599 // register get a copy of the non-allocatable physical register, and we only
1600 // track one such copy. Avoid getting confused by this new non-allocatable
1601 // physical register definition, and remove it from the tracked copies.
1602 LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
1603 NAPhysToVirtMIs.erase(PrevCopy);
1604 return false;
1605}
1606
1607/// \bried Returns true if \p MO is a virtual register operand.
1609 return MO.isReg() && MO.getReg().isVirtual();
1610}
1611
1612bool PeepholeOptimizer::findTargetRecurrence(
1613 Register Reg, const SmallSet<Register, 2> &TargetRegs,
1614 RecurrenceCycle &RC) {
1615 // Recurrence found if Reg is in TargetRegs.
1616 if (TargetRegs.count(Reg))
1617 return true;
1618
1619 // TODO: Curerntly, we only allow the last instruction of the recurrence
1620 // cycle (the instruction that feeds the PHI instruction) to have more than
1621 // one uses to guarantee that commuting operands does not tie registers
1622 // with overlapping live range. Once we have actual live range info of
1623 // each register, this constraint can be relaxed.
1624 if (!MRI->hasOneNonDBGUse(Reg))
1625 return false;
1626
1627 // Give up if the reccurrence chain length is longer than the limit.
1628 if (RC.size() >= MaxRecurrenceChain)
1629 return false;
1630
1631 MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
1632 unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
1633
1634 // Only interested in recurrences whose instructions have only one def, which
1635 // is a virtual register.
1636 if (MI.getDesc().getNumDefs() != 1)
1637 return false;
1638
1639 MachineOperand &DefOp = MI.getOperand(0);
1640 if (!isVirtualRegisterOperand(DefOp))
1641 return false;
1642
1643 // Check if def operand of MI is tied to any use operand. We are only
1644 // interested in the case that all the instructions in the recurrence chain
1645 // have there def operand tied with one of the use operand.
1646 unsigned TiedUseIdx;
1647 if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1648 return false;
1649
1650 if (Idx == TiedUseIdx) {
1651 RC.push_back(RecurrenceInstr(&MI));
1652 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1653 } else {
1654 // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
1655 unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
1656 if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1657 RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
1658 return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1659 }
1660 }
1661
1662 return false;
1663}
1664
1665/// Phi instructions will eventually be lowered to copy instructions.
1666/// If phi is in a loop header, a recurrence may formulated around the source
1667/// and destination of the phi. For such case commuting operands of the
1668/// instructions in the recurrence may enable coalescing of the copy instruction
1669/// generated from the phi. For example, if there is a recurrence of
1670///
1671/// LoopHeader:
1672/// %1 = phi(%0, %100)
1673/// LoopLatch:
1674/// %0<def, tied1> = ADD %2<def, tied0>, %1
1675///
1676/// , the fact that %0 and %2 are in the same tied operands set makes
1677/// the coalescing of copy instruction generated from the phi in
1678/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
1679/// %2 have overlapping live range. This introduces additional move
1680/// instruction to the final assembly. However, if we commute %2 and
1681/// %1 of ADD instruction, the redundant move instruction can be
1682/// avoided.
1683bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
1684 SmallSet<Register, 2> TargetRegs;
1685 for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
1686 MachineOperand &MO = PHI.getOperand(Idx);
1687 assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
1688 TargetRegs.insert(MO.getReg());
1689 }
1690
1691 bool Changed = false;
1692 RecurrenceCycle RC;
1693 if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1694 // Commutes operands of instructions in RC if necessary so that the copy to
1695 // be generated from PHI can be coalesced.
1696 LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
1697 for (auto &RI : RC) {
1698 LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
1699 auto CP = RI.getCommutePair();
1700 if (CP) {
1701 Changed = true;
1702 TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
1703 (*CP).second);
1704 LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
1705 }
1706 }
1707 }
1708
1709 return Changed;
1710}
1711
1712PreservedAnalyses
1715 MFPropsModifier _(*this, MF);
1716 auto *DT =
1717 Aggressive ? &MFAM.getResult<MachineDominatorTreeAnalysis>(MF) : nullptr;
1718 auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
1719 PeepholeOptimizer Impl(DT, MLI);
1720 bool Changed = Impl.run(MF);
1721 if (!Changed)
1722 return PreservedAnalyses::all();
1723
1725 PA.preserve<MachineDominatorTreeAnalysis>();
1726 PA.preserve<MachineLoopAnalysis>();
1727 PA.preserveSet<CFGAnalyses>();
1728 return PA;
1729}
1730
1731bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
1732 if (skipFunction(MF.getFunction()))
1733 return false;
1734 auto *DT = Aggressive
1735 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1736 : nullptr;
1737 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1738 PeepholeOptimizer Impl(DT, MLI);
1739 return Impl.run(MF);
1740}
1741
1742bool PeepholeOptimizer::run(MachineFunction &MF) {
1743
1744 LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
1745 LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
1746
1747 if (DisablePeephole)
1748 return false;
1749
1750 TII = MF.getSubtarget().getInstrInfo();
1752 MRI = &MF.getRegInfo();
1753 MF.setDelegate(this);
1754
1755 bool Changed = false;
1756
1757 for (MachineBasicBlock &MBB : MF) {
1758 bool SeenMoveImm = false;
1759
1760 // During this forward scan, at some point it needs to answer the question
1761 // "given a pointer to an MI in the current BB, is it located before or
1762 // after the current instruction".
1763 // To perform this, the following set keeps track of the MIs already seen
1764 // during the scan, if a MI is not in the set, it is assumed to be located
1765 // after. Newly created MIs have to be inserted in the set as well.
1767 SmallSet<Register, 4> ImmDefRegs;
1769 SmallSet<Register, 16> FoldAsLoadDefCandidates;
1770
1771 // Track when a non-allocatable physical register is copied to a virtual
1772 // register so that useless moves can be removed.
1773 //
1774 // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
1775 // without any intervening re-definition of $physreg.
1776 DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
1777
1778 CopySrcMIs.clear();
1779
1780 bool IsLoopHeader = MLI->isLoopHeader(&MBB);
1781
1782 for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
1783 MII != MIE;) {
1784 MachineInstr *MI = &*MII;
1785 // We may be erasing MI below, increment MII now.
1786 ++MII;
1787 LocalMIs.insert(MI);
1788
1789 // Skip debug instructions. They should not affect this peephole
1790 // optimization.
1791 if (MI->isDebugInstr())
1792 continue;
1793
1794 if (MI->isPosition())
1795 continue;
1796
1797 if (IsLoopHeader && MI->isPHI()) {
1798 if (optimizeRecurrence(*MI)) {
1799 Changed = true;
1800 continue;
1801 }
1802 }
1803
1804 if (!MI->isCopy()) {
1805 for (const MachineOperand &MO : MI->operands()) {
1806 // Visit all operands: definitions can be implicit or explicit.
1807 if (MO.isReg()) {
1808 Register Reg = MO.getReg();
1809 if (MO.isDef() && isNAPhysCopy(Reg)) {
1810 const auto &Def = NAPhysToVirtMIs.find(Reg);
1811 if (Def != NAPhysToVirtMIs.end()) {
1812 // A new definition of the non-allocatable physical register
1813 // invalidates previous copies.
1815 << "NAPhysCopy: invalidating because of " << *MI);
1816 NAPhysToVirtMIs.erase(Def);
1817 }
1818 }
1819 } else if (MO.isRegMask()) {
1820 const uint32_t *RegMask = MO.getRegMask();
1821 NAPhysToVirtMIs.remove_if([&](const auto &RegMI) {
1822 if (!MachineOperand::clobbersPhysReg(RegMask, RegMI.first))
1823 return false;
1825 << "NAPhysCopy: invalidating because of " << *MI);
1826 return true;
1827 });
1828 }
1829 }
1830 }
1831
1832 if (MI->isImplicitDef() || MI->isKill())
1833 continue;
1834
1835 if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
1836 // Blow away all non-allocatable physical registers knowledge since we
1837 // don't know what's correct anymore.
1838 //
1839 // FIXME: handle explicit asm clobbers.
1840 LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
1841 << *MI);
1842 NAPhysToVirtMIs.clear();
1843 }
1844
1845 if (MI->isCompare() && optimizeCmpInstr(*MI, MF, LocalMIs)) {
1846 LocalMIs.erase(MI);
1847 Changed = true;
1848 continue;
1849 }
1850
1851 if ((isUncoalescableCopy(*MI) &&
1852 optimizeUncoalescableCopy(*MI, LocalMIs)) ||
1853 (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
1854 // MI is deleted.
1855 LocalMIs.erase(MI);
1856 Changed = true;
1857 continue;
1858 }
1859
1860 if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
1861 Changed = true;
1862 continue;
1863 }
1864
1865 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
1866 // MI is just rewritten.
1867 Changed = true;
1868 continue;
1869 }
1870
1871 if (MI->isCopy() && (foldRedundantCopy(*MI) ||
1872 foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
1873 LocalMIs.erase(MI);
1874 LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
1875 MI->eraseFromParent();
1876 Changed = true;
1877 continue;
1878 }
1879
1880 if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
1881 SeenMoveImm = true;
1882 } else {
1883 Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
1884 // optimizeExtInstr might have created new instructions after MI
1885 // and before the already incremented MII. Adjust MII so that the
1886 // next iteration sees the new instructions.
1887 MII = MI;
1888 ++MII;
1889 if (SeenMoveImm) {
1890 bool Deleted;
1891 Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
1892 if (Deleted) {
1893 LocalMIs.erase(MI);
1894 continue;
1895 }
1896 }
1897 }
1898
1899 // Check whether MI is a load candidate for folding into a later
1900 // instruction. If MI is not a candidate, check whether we can fold an
1901 // earlier load into MI.
1902 if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
1903 !FoldAsLoadDefCandidates.empty()) {
1904
1905 // We visit each operand even after successfully folding a previous
1906 // one. This allows us to fold multiple loads into a single
1907 // instruction. We do assume that optimizeLoadInstr doesn't insert
1908 // foldable uses earlier in the argument list. Since we don't restart
1909 // iteration, we'd miss such cases.
1910 const MCInstrDesc &MIDesc = MI->getDesc();
1911 for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); ++i) {
1912 const MachineOperand &MOp = MI->getOperand(i);
1913 if (!MOp.isReg())
1914 continue;
1915 Register FoldAsLoadDefReg = MOp.getReg();
1916 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
1917 // We need to fold load after optimizeCmpInstr, since
1918 // optimizeCmpInstr can enable folding by converting SUB to CMP.
1919 Register FoldedReg = FoldAsLoadDefReg;
1920 if (MachineInstr *FoldMI =
1921 foldLoadInto(MF, *MI, FoldAsLoadDefReg, LocalMIs)) {
1922 FoldAsLoadDefCandidates.erase(FoldedReg);
1923 // MI is replaced with FoldMI so we can continue trying to fold
1924 Changed = true;
1925 MI = FoldMI;
1926 }
1927 }
1928 }
1929 }
1930
1931 // If we run into an instruction we can't fold across, discard
1932 // the load candidates. Note: We might be able to fold *into* this
1933 // instruction, so this needs to be after the folding logic.
1934 if (MI->isLoadFoldBarrier()) {
1935 LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
1936 FoldAsLoadDefCandidates.clear();
1937 }
1938 }
1939 }
1940
1941 MF.resetDelegate(this);
1942 return Changed;
1943}
1944
1945ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1946 assert(Def->isCopy() && "Invalid definition");
1947 // Copy instruction are supposed to be: Def = Src.
1948 // If someone breaks this assumption, bad things will happen everywhere.
1949 // There may be implicit uses preventing the copy to be moved across
1950 // some target specific register definitions
1951 assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
1952 "Invalid number of operands");
1953 assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
1954 assert(!Def->getOperand(DefIdx).getSubReg() && "no subregister defs in SSA");
1955
1956 // Otherwise, we want the whole source.
1957 const MachineOperand &Src = Def->getOperand(1);
1958 if (Src.isUndef())
1959 return ValueTrackerResult();
1960
1961 Register SrcReg = Src.getReg();
1962 unsigned SubReg = Src.getSubReg();
1963 if (DefSubReg) {
1964 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1965 SubReg = TRI->composeSubRegIndices(SubReg, DefSubReg);
1966
1967 if (SrcReg.isVirtual()) {
1968 // TODO: Try constraining on rewrite if we can
1969 const TargetRegisterClass *RegRC = MRI.getRegClass(SrcReg);
1970 if (!TRI->isSubRegValidForRegClass(RegRC, SubReg))
1971 return ValueTrackerResult();
1972 } else {
1973 if (!TRI->getSubReg(SrcReg, SubReg))
1974 return ValueTrackerResult();
1975 }
1976 }
1977
1978 return ValueTrackerResult(SrcReg, SubReg);
1979}
1980
1981ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1982 assert(Def->isBitcast() && "Invalid definition");
1983
1984 // Bail if there are effects that a plain copy will not expose.
1985 if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
1986 return ValueTrackerResult();
1987
1988 // Bitcasts with more than one def are not supported.
1989 if (Def->getDesc().getNumDefs() != 1)
1990 return ValueTrackerResult();
1991
1992 assert(!Def->getOperand(DefIdx).getSubReg() && "no subregister defs in SSA");
1993
1994 unsigned SrcIdx = Def->getNumOperands();
1995 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
1996 ++OpIdx) {
1997 const MachineOperand &MO = Def->getOperand(OpIdx);
1998 if (!MO.isReg() || !MO.getReg())
1999 continue;
2000 // Ignore dead implicit defs.
2001 if (MO.isImplicit() && MO.isDead())
2002 continue;
2003 assert(!MO.isDef() && "We should have skipped all the definitions by now");
2004 if (SrcIdx != EndOpIdx)
2005 // Multiple sources?
2006 return ValueTrackerResult();
2007 SrcIdx = OpIdx;
2008 }
2009
2010 // In some rare case, Def has no input, SrcIdx is out of bound,
2011 // getOperand(SrcIdx) will fail below.
2012 if (SrcIdx >= Def->getNumOperands())
2013 return ValueTrackerResult();
2014
2015 const MachineOperand &DefOp = Def->getOperand(DefIdx);
2016
2017 // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
2018 // will break the assumed guarantees for the upper bits.
2019 for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
2020 if (UseMI.isSubregToReg())
2021 return ValueTrackerResult();
2022 }
2023
2024 const MachineOperand &Src = Def->getOperand(SrcIdx);
2025 if (Src.isUndef())
2026 return ValueTrackerResult();
2027 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
2028}
2029
2030ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
2031 assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
2032 "Invalid definition");
2033
2034 assert(!Def->getOperand(DefIdx).getSubReg() && "illegal subregister def");
2035
2037 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
2038 return ValueTrackerResult();
2039
2040 // We are looking at:
2041 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
2042 //
2043 // Check if one of the operands exactly defines the subreg we are interested
2044 // in.
2045 for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
2046 if (RegSeqInput.SubIdx == DefSubReg)
2047 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
2048 }
2049
2050 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2051
2052 // If we did not find an exact match, see if we can do a composition to
2053 // extract a sub-subregister.
2054 for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
2055 LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg);
2056 LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
2057
2058 // Check that this extract reads a subset of this single reg_sequence input.
2059 //
2060 // FIXME: We should be able to filter this in terms of the indexes directly
2061 // without checking the lanemasks.
2062 if ((DefMask & ThisOpRegMask) != DefMask)
2063 continue;
2064
2065 unsigned ReverseDefCompose =
2066 TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
2067 if (!ReverseDefCompose)
2068 continue;
2069
2070 unsigned ComposedDefInSrcReg1 =
2071 TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2072
2073 // TODO: We should be able to defer checking if the result register class
2074 // supports the index to continue looking for a rewritable source.
2075 //
2076 // TODO: Should we modify the register class to support the index?
2077 const TargetRegisterClass *SrcRC = MRI.getRegClass(RegSeqInput.Reg);
2078 if (!TRI->isSubRegValidForRegClass(SrcRC, ComposedDefInSrcReg1))
2079 return ValueTrackerResult();
2080
2081 return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2082 }
2083
2084 // If the subreg we are tracking is super-defined by another subreg,
2085 // we could follow this value. However, this would require to compose
2086 // the subreg and we do not do that for now.
2087 return ValueTrackerResult();
2088}
2089
2090ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2091 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
2092 "Invalid definition");
2093 assert(!Def->getOperand(DefIdx).getSubReg() && "no subreg defs in SSA");
2094
2096 RegSubRegPairAndIdx InsertedReg;
2097 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2098 return ValueTrackerResult();
2099
2100 // We are looking at:
2101 // Def = INSERT_SUBREG v0, v1, sub1
2102 // There are two cases:
2103 // 1. DefSubReg == sub1, get v1.
2104 // 2. DefSubReg != sub1, the value may be available through v0.
2105
2106 // #1 Check if the inserted register matches the required sub index.
2107 if (InsertedReg.SubIdx == DefSubReg) {
2108 return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
2109 }
2110 // #2 Otherwise, if the sub register we are looking for is not partial
2111 // defined by the inserted element, we can look through the main
2112 // register (v0).
2113 const MachineOperand &MODef = Def->getOperand(DefIdx);
2114 // If the result register (Def) and the base register (v0) do not
2115 // have the same register class or if we have to compose
2116 // subregisters, bail out.
2117 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
2118 BaseReg.SubReg)
2119 return ValueTrackerResult();
2120
2121 // Get the TRI and check if the inserted sub-register overlaps with the
2122 // sub-register we are tracking.
2123 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2124 if ((TRI->getSubRegIndexLaneMask(DefSubReg) &
2125 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx))
2126 .any())
2127 return ValueTrackerResult();
2128 // At this point, the value is available in v0 via the same subreg
2129 // we used for Def.
2130 return ValueTrackerResult(BaseReg.Reg, DefSubReg);
2131}
2132
2133ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2134 assert((Def->isExtractSubreg() || Def->isExtractSubregLike()) &&
2135 "Invalid definition");
2136 // We are looking at:
2137 // Def = EXTRACT_SUBREG v0, sub0
2138
2139 // Bail if we have to compose sub registers.
2140 // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
2141 if (DefSubReg)
2142 return ValueTrackerResult();
2143
2144 RegSubRegPairAndIdx ExtractSubregInputReg;
2145 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2146 return ValueTrackerResult();
2147
2148 // Bail if we have to compose sub registers.
2149 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
2150 if (ExtractSubregInputReg.SubReg)
2151 return ValueTrackerResult();
2152 // Otherwise, the value is available in the v0.sub0.
2153 return ValueTrackerResult(ExtractSubregInputReg.Reg,
2154 ExtractSubregInputReg.SubIdx);
2155}
2156
2157ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2158 assert(Def->isSubregToReg() && "Invalid definition");
2159 // We are looking at:
2160 // Def = SUBREG_TO_REG v0, sub0
2161
2162 // Bail if we have to compose sub registers.
2163 // If DefSubReg != sub0, we would have to check that all the bits
2164 // we track are included in sub0 and if yes, we would have to
2165 // determine the right subreg in v0.
2166 if (DefSubReg != Def->getOperand(2).getImm())
2167 return ValueTrackerResult();
2168 // Bail if we have to compose sub registers.
2169 // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
2170 if (Def->getOperand(1).getSubReg())
2171 return ValueTrackerResult();
2172
2173 return ValueTrackerResult(Def->getOperand(1).getReg(),
2174 Def->getOperand(2).getImm());
2175}
2176
2177/// Explore each PHI incoming operand and return its sources.
2178ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2179 assert(Def->isPHI() && "Invalid definition");
2180 ValueTrackerResult Res;
2181
2182 // Return all register sources for PHI instructions.
2183 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
2184 const MachineOperand &MO = Def->getOperand(i);
2185 assert(MO.isReg() && "Invalid PHI instruction");
2186 // We have no code to deal with undef operands. They shouldn't happen in
2187 // normal programs anyway.
2188 if (MO.isUndef())
2189 return ValueTrackerResult();
2190 Res.addSource(MO.getReg(), MO.getSubReg());
2191 }
2192
2193 return Res;
2194}
2195
2196ValueTrackerResult ValueTracker::getNextSourceImpl() {
2197 assert(Def && "This method needs a valid definition");
2198
2199 assert(((Def->getOperand(DefIdx).isDef() &&
2200 (DefIdx < Def->getDesc().getNumDefs() ||
2201 Def->getDesc().isVariadic())) ||
2202 Def->getOperand(DefIdx).isImplicit()) &&
2203 "Invalid DefIdx");
2204 if (Def->isCopy())
2205 return getNextSourceFromCopy();
2206 if (Def->isBitcast())
2207 return getNextSourceFromBitcast();
2208 // All the remaining cases involve "complex" instructions.
2209 // Bail if we did not ask for the advanced tracking.
2211 return ValueTrackerResult();
2212 if (Def->isRegSequence() || Def->isRegSequenceLike())
2213 return getNextSourceFromRegSequence();
2214 if (Def->isInsertSubreg() || Def->isInsertSubregLike())
2215 return getNextSourceFromInsertSubreg();
2216 if (Def->isExtractSubreg() || Def->isExtractSubregLike())
2217 return getNextSourceFromExtractSubreg();
2218 if (Def->isSubregToReg())
2219 return getNextSourceFromSubregToReg();
2220 if (Def->isPHI())
2221 return getNextSourceFromPHI();
2222 return ValueTrackerResult();
2223}
2224
2225ValueTrackerResult ValueTracker::getNextSource() {
2226 // If we reach a point where we cannot move up in the use-def chain,
2227 // there is nothing we can get.
2228 if (!Def)
2229 return ValueTrackerResult();
2230
2231 ValueTrackerResult Res = getNextSourceImpl();
2232 if (Res.isValid()) {
2233 // Update definition, definition index, and subregister for the
2234 // next call of getNextSource.
2235 // Update the current register.
2236 bool OneRegSrc = Res.getNumSources() == 1;
2237 if (OneRegSrc)
2238 Reg = Res.getSrcReg(0);
2239 // Update the result before moving up in the use-def chain
2240 // with the instruction containing the last found sources.
2241 Res.setInst(Def);
2242
2243 // If we can still move up in the use-def chain, move to the next
2244 // definition.
2245 if (!Reg.isPhysical() && OneRegSrc) {
2247 if (DI != MRI.def_end()) {
2248 Def = DI->getParent();
2249 DefIdx = DI.getOperandNo();
2250 DefSubReg = Res.getSrcSubReg(0);
2251 } else {
2252 Def = nullptr;
2253 }
2254 return Res;
2255 }
2256 }
2257 // If we end up here, this means we will not be able to find another source
2258 // for the next iteration. Make sure any new call to getNextSource bails out
2259 // early by cutting the use-def chain.
2260 Def = nullptr;
2261 return Res;
2262}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition MD5.cpp:57
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
bool erase(const KeyT &Val)
Definition DenseMap.h:379
bool remove_if(Predicate Pred)
Remove entries that match the given predicate.
Definition DenseMap.h:395
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void moveAdditionalCallInfo(const MachineInstr *Old, const MachineInstr *New)
Move the call site info from Old to \New call site info.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool canFoldAsLoad(QueryType Type=IgnoreBundle) const
Return true for instructions that can be folded as memory operands in other instructions.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
LLVM_ABI void markUsesInDebugValueAsUndef(Register Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
def_iterator def_begin(Register RegNo) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
LLVM_ABI bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
defusechain_iterator< false, true, false, true, false > def_iterator
def_iterator/def_begin/def_end - Walk all defs of the specified register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static def_iterator def_end()
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
bool empty() const
Definition SmallSet.h:169
bool erase(const T &V)
Definition SmallSet.h:200
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Other
Any other memory.
Definition ModRef.h:68
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.