LLVM 23.0.0git
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a pass that performs load / store related peephole
10/// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMISelLowering.h"
19#include "ARMSubtarget.h"
22#include "Utils/ARMBaseInfo.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/Type.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
60#include "llvm/Support/Debug.h"
63#include <cassert>
64#include <cstddef>
65#include <cstdlib>
66#include <iterator>
67#include <limits>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "arm-ldst-opt"
73
74STATISTIC(NumLDMGened , "Number of ldm instructions generated");
75STATISTIC(NumSTMGened , "Number of stm instructions generated");
76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
85
86/// This switch disables formation of double/multi instructions that could
87/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
88/// disabled. This can be used to create libraries that are robust even when
89/// users provoke undefined behaviour by supplying misaligned pointers.
90/// \see mayCombineMisaligned()
91static cl::opt<bool>
92AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
94
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
96
97namespace {
98
99/// Post- register allocation pass the combine load / store instructions to
100/// form ldm / stm instructions.
101struct ARMLoadStoreOpt {
102 const MachineFunction *MF;
103 const TargetInstrInfo *TII;
104 const TargetRegisterInfo *TRI;
105 const ARMSubtarget *STI;
106 const TargetLowering *TL;
107 ARMFunctionInfo *AFI;
109 RegisterClassInfo RegClassInfo;
111 bool LiveRegsValid;
112 bool RegClassInfoValid;
113 bool isThumb1, isThumb2;
114
115 bool runOnMachineFunction(MachineFunction &Fn);
116
117private:
118 /// A set of load/store MachineInstrs with same base register sorted by
119 /// offset.
120 struct MemOpQueueEntry {
122 int Offset; ///< Load/Store offset.
123 unsigned Position; ///< Position as counted from end of basic block.
124
125 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
126 : MI(&MI), Offset(Offset), Position(Position) {}
127 };
128 using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
129
130 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
131 /// merged into a LDM/STM.
132 struct MergeCandidate {
133 /// List of instructions ordered by load/store offset.
135
136 /// Index in Instrs of the instruction being latest in the schedule.
137 unsigned LatestMIIdx;
138
139 /// Index in Instrs of the instruction being earliest in the schedule.
140 unsigned EarliestMIIdx;
141
142 /// Index into the basic block where the merged instruction will be
143 /// inserted. (See MemOpQueueEntry.Position)
144 unsigned InsertPos;
145
146 /// Whether the instructions can be merged into a ldm/stm instruction.
147 bool CanMergeToLSMulti;
148
149 /// Whether the instructions can be merged into a ldrd/strd instruction.
150 bool CanMergeToLSDouble;
151 };
154 SmallVector<MachineInstr *, 4> MergeBaseCandidates;
155
156 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
158 unsigned findFreeReg(const TargetRegisterClass &RegClass);
159 void UpdateBaseRegUses(MachineBasicBlock &MBB,
161 unsigned Base, unsigned WordOffset,
162 ARMCC::CondCodes Pred, unsigned PredReg);
163 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
164 MachineBasicBlock::iterator InsertBefore,
165 int Offset, unsigned Base, bool BaseKill,
166 unsigned Opcode, ARMCC::CondCodes Pred,
167 unsigned PredReg, const DebugLoc &DL,
168 ArrayRef<std::pair<unsigned, bool>> Regs,
170 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
171 MachineBasicBlock::iterator InsertBefore,
172 int Offset, unsigned Base, bool BaseKill,
173 unsigned Opcode, ARMCC::CondCodes Pred,
174 unsigned PredReg, const DebugLoc &DL,
175 ArrayRef<std::pair<unsigned, bool>> Regs,
176 ArrayRef<MachineInstr *> Instrs) const;
177 void FormCandidates(const MemOpQueue &MemOps);
178 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
179 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
181 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
182 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
183 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
184 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
185 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
186 bool CombineMovBx(MachineBasicBlock &MBB);
187};
188
189struct ARMLoadStoreOptLegacy : public MachineFunctionPass {
190 static char ID;
191
192 ARMLoadStoreOptLegacy() : MachineFunctionPass(ID) {}
193
194 bool runOnMachineFunction(MachineFunction &Fn) override;
195
196 MachineFunctionProperties getRequiredProperties() const override {
197 return MachineFunctionProperties().setNoVRegs();
198 }
199
200 StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
201};
202
203char ARMLoadStoreOptLegacy::ID = 0;
204
205} // end anonymous namespace
206
207INITIALIZE_PASS(ARMLoadStoreOptLegacy, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME,
208 false, false)
209
210static bool definesCPSR(const MachineInstr &MI) {
211 for (const auto &MO : MI.operands()) {
212 if (!MO.isReg())
213 continue;
214 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
215 // If the instruction has live CPSR def, then it's not safe to fold it
216 // into load / store.
217 return true;
218 }
219
220 return false;
221}
222
224 unsigned Opcode = MI.getOpcode();
225 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
226 unsigned NumOperands = MI.getDesc().getNumOperands();
227 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
228
229 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
230 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
231 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
232 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
233 return OffField;
234
235 // Thumb1 immediate offsets are scaled by 4
236 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
237 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
238 return OffField * 4;
239
240 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
241 : ARM_AM::getAM5Offset(OffField) * 4;
242 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
243 : ARM_AM::getAM5Op(OffField);
244
245 if (Op == ARM_AM::sub)
246 return -Offset;
247
248 return Offset;
249}
250
252 return MI.getOperand(1);
253}
254
256 return MI.getOperand(0);
257}
258
260 switch (Opcode) {
261 default: llvm_unreachable("Unhandled opcode!");
262 case ARM::LDRi12:
263 ++NumLDMGened;
264 switch (Mode) {
265 default: llvm_unreachable("Unhandled submode!");
266 case ARM_AM::ia: return ARM::LDMIA;
267 case ARM_AM::da: return ARM::LDMDA;
268 case ARM_AM::db: return ARM::LDMDB;
269 case ARM_AM::ib: return ARM::LDMIB;
270 }
271 case ARM::STRi12:
272 ++NumSTMGened;
273 switch (Mode) {
274 default: llvm_unreachable("Unhandled submode!");
275 case ARM_AM::ia: return ARM::STMIA;
276 case ARM_AM::da: return ARM::STMDA;
277 case ARM_AM::db: return ARM::STMDB;
278 case ARM_AM::ib: return ARM::STMIB;
279 }
280 case ARM::tLDRi:
281 case ARM::tLDRspi:
282 // tLDMIA is writeback-only - unless the base register is in the input
283 // reglist.
284 ++NumLDMGened;
285 switch (Mode) {
286 default: llvm_unreachable("Unhandled submode!");
287 case ARM_AM::ia: return ARM::tLDMIA;
288 }
289 case ARM::tSTRi:
290 case ARM::tSTRspi:
291 // There is no non-writeback tSTMIA either.
292 ++NumSTMGened;
293 switch (Mode) {
294 default: llvm_unreachable("Unhandled submode!");
295 case ARM_AM::ia: return ARM::tSTMIA_UPD;
296 }
297 case ARM::t2LDRi8:
298 case ARM::t2LDRi12:
299 ++NumLDMGened;
300 switch (Mode) {
301 default: llvm_unreachable("Unhandled submode!");
302 case ARM_AM::ia: return ARM::t2LDMIA;
303 case ARM_AM::db: return ARM::t2LDMDB;
304 }
305 case ARM::t2STRi8:
306 case ARM::t2STRi12:
307 ++NumSTMGened;
308 switch (Mode) {
309 default: llvm_unreachable("Unhandled submode!");
310 case ARM_AM::ia: return ARM::t2STMIA;
311 case ARM_AM::db: return ARM::t2STMDB;
312 }
313 case ARM::VLDRS:
314 ++NumVLDMGened;
315 switch (Mode) {
316 default: llvm_unreachable("Unhandled submode!");
317 case ARM_AM::ia: return ARM::VLDMSIA;
318 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
319 }
320 case ARM::VSTRS:
321 ++NumVSTMGened;
322 switch (Mode) {
323 default: llvm_unreachable("Unhandled submode!");
324 case ARM_AM::ia: return ARM::VSTMSIA;
325 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
326 }
327 case ARM::VLDRD:
328 ++NumVLDMGened;
329 switch (Mode) {
330 default: llvm_unreachable("Unhandled submode!");
331 case ARM_AM::ia: return ARM::VLDMDIA;
332 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
333 }
334 case ARM::VSTRD:
335 ++NumVSTMGened;
336 switch (Mode) {
337 default: llvm_unreachable("Unhandled submode!");
338 case ARM_AM::ia: return ARM::VSTMDIA;
339 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
340 }
341 }
342}
343
345 switch (Opcode) {
346 default: llvm_unreachable("Unhandled opcode!");
347 case ARM::LDMIA_RET:
348 case ARM::LDMIA:
349 case ARM::LDMIA_UPD:
350 case ARM::STMIA:
351 case ARM::STMIA_UPD:
352 case ARM::tLDMIA:
353 case ARM::tLDMIA_UPD:
354 case ARM::tSTMIA_UPD:
355 case ARM::t2LDMIA_RET:
356 case ARM::t2LDMIA:
357 case ARM::t2LDMIA_UPD:
358 case ARM::t2STMIA:
359 case ARM::t2STMIA_UPD:
360 case ARM::VLDMSIA:
361 case ARM::VLDMSIA_UPD:
362 case ARM::VSTMSIA:
363 case ARM::VSTMSIA_UPD:
364 case ARM::VLDMDIA:
365 case ARM::VLDMDIA_UPD:
366 case ARM::VSTMDIA:
367 case ARM::VSTMDIA_UPD:
368 return ARM_AM::ia;
369
370 case ARM::LDMDA:
371 case ARM::LDMDA_UPD:
372 case ARM::STMDA:
373 case ARM::STMDA_UPD:
374 return ARM_AM::da;
375
376 case ARM::LDMDB:
377 case ARM::LDMDB_UPD:
378 case ARM::STMDB:
379 case ARM::STMDB_UPD:
380 case ARM::t2LDMDB:
381 case ARM::t2LDMDB_UPD:
382 case ARM::t2STMDB:
383 case ARM::t2STMDB_UPD:
384 case ARM::VLDMSDB_UPD:
385 case ARM::VSTMSDB_UPD:
386 case ARM::VLDMDDB_UPD:
387 case ARM::VSTMDDB_UPD:
388 return ARM_AM::db;
389
390 case ARM::LDMIB:
391 case ARM::LDMIB_UPD:
392 case ARM::STMIB:
393 case ARM::STMIB_UPD:
394 return ARM_AM::ib;
395 }
396}
397
398static bool isT1i32Load(unsigned Opc) {
399 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
400}
401
402static bool isT2i32Load(unsigned Opc) {
403 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
404}
405
406static bool isi32Load(unsigned Opc) {
407 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
408}
409
410static bool isT1i32Store(unsigned Opc) {
411 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
412}
413
414static bool isT2i32Store(unsigned Opc) {
415 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
416}
417
418static bool isi32Store(unsigned Opc) {
419 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
420}
421
422static bool isLoadSingle(unsigned Opc) {
423 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
424}
425
426static unsigned getImmScale(unsigned Opc) {
427 switch (Opc) {
428 default: llvm_unreachable("Unhandled opcode!");
429 case ARM::tLDRi:
430 case ARM::tSTRi:
431 case ARM::tLDRspi:
432 case ARM::tSTRspi:
433 return 1;
434 case ARM::tLDRHi:
435 case ARM::tSTRHi:
436 return 2;
437 case ARM::tLDRBi:
438 case ARM::tSTRBi:
439 return 4;
440 }
441}
442
444 switch (MI->getOpcode()) {
445 default: return 0;
446 case ARM::LDRi12:
447 case ARM::STRi12:
448 case ARM::tLDRi:
449 case ARM::tSTRi:
450 case ARM::tLDRspi:
451 case ARM::tSTRspi:
452 case ARM::t2LDRi8:
453 case ARM::t2LDRi12:
454 case ARM::t2STRi8:
455 case ARM::t2STRi12:
456 case ARM::VLDRS:
457 case ARM::VSTRS:
458 return 4;
459 case ARM::VLDRD:
460 case ARM::VSTRD:
461 return 8;
462 case ARM::LDMIA:
463 case ARM::LDMDA:
464 case ARM::LDMDB:
465 case ARM::LDMIB:
466 case ARM::STMIA:
467 case ARM::STMDA:
468 case ARM::STMDB:
469 case ARM::STMIB:
470 case ARM::tLDMIA:
471 case ARM::tLDMIA_UPD:
472 case ARM::tSTMIA_UPD:
473 case ARM::t2LDMIA:
474 case ARM::t2LDMDB:
475 case ARM::t2STMIA:
476 case ARM::t2STMDB:
477 case ARM::VLDMSIA:
478 case ARM::VSTMSIA:
479 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
480 case ARM::VLDMDIA:
481 case ARM::VSTMDIA:
482 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
483 }
484}
485
486/// Update future uses of the base register with the offset introduced
487/// due to writeback. This function only works on Thumb1.
488void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
490 const DebugLoc &DL, unsigned Base,
491 unsigned WordOffset,
492 ARMCC::CondCodes Pred,
493 unsigned PredReg) {
494 assert(isThumb1 && "Can only update base register uses for Thumb1!");
495 // Start updating any instructions with immediate offsets. Insert a SUB before
496 // the first non-updateable instruction (if any).
497 for (; MBBI != MBB.end(); ++MBBI) {
498 bool InsertSub = false;
499 unsigned Opc = MBBI->getOpcode();
500
501 if (MBBI->readsRegister(Base, /*TRI=*/nullptr)) {
502 int Offset;
503 bool IsLoad =
504 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
505 bool IsStore =
506 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
507
508 if (IsLoad || IsStore) {
509 // Loads and stores with immediate offsets can be updated, but only if
510 // the new offset isn't negative.
511 // The MachineOperand containing the offset immediate is the last one
512 // before predicates.
513 MachineOperand &MO =
514 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
515 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
516 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
517
518 // If storing the base register, it needs to be reset first.
519 Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
520
521 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
522 MO.setImm(Offset);
523 else
524 InsertSub = true;
525 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
526 !definesCPSR(*MBBI)) {
527 // SUBS/ADDS using this register, with a dead def of the CPSR.
528 // Merge it with the update; if the merged offset is too large,
529 // insert a new sub instead.
530 MachineOperand &MO =
531 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
532 Offset = (Opc == ARM::tSUBi8) ?
533 MO.getImm() + WordOffset * 4 :
534 MO.getImm() - WordOffset * 4 ;
535 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
536 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
537 // Offset == 0.
538 MO.setImm(Offset);
539 // The base register has now been reset, so exit early.
540 return;
541 } else {
542 InsertSub = true;
543 }
544 } else {
545 // Can't update the instruction.
546 InsertSub = true;
547 }
548 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
549 // Since SUBS sets the condition flags, we can't place the base reset
550 // after an instruction that has a live CPSR def.
551 // The base register might also contain an argument for a function call.
552 InsertSub = true;
553 }
554
555 if (InsertSub) {
556 // An instruction above couldn't be updated, so insert a sub.
557 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
558 .add(t1CondCodeOp(true))
559 .addReg(Base)
560 .addImm(WordOffset * 4)
561 .addImm(Pred)
562 .addReg(PredReg);
563 return;
564 }
565
566 if (MBBI->killsRegister(Base, /*TRI=*/nullptr) ||
567 MBBI->definesRegister(Base, /*TRI=*/nullptr))
568 // Register got killed. Stop updating.
569 return;
570 }
571
572 // End of block was reached.
573 if (!MBB.succ_empty()) {
574 // FIXME: Because of a bug, live registers are sometimes missing from
575 // the successor blocks' live-in sets. This means we can't trust that
576 // information and *always* have to reset at the end of a block.
577 // See PR21029.
578 if (MBBI != MBB.end()) --MBBI;
579 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
580 .add(t1CondCodeOp(true))
581 .addReg(Base)
582 .addImm(WordOffset * 4)
583 .addImm(Pred)
584 .addReg(PredReg);
585 }
586}
587
588/// Return the first register of class \p RegClass that is not in \p Regs.
589unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
590 if (!RegClassInfoValid) {
591 RegClassInfo.runOnMachineFunction(*MF);
592 RegClassInfoValid = true;
593 }
594
595 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
596 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
597 return Reg;
598 return 0;
599}
600
601/// Compute live registers just before instruction \p Before (in normal schedule
602/// direction). Computes backwards so multiple queries in the same block must
603/// come in reverse order.
604void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
606 // Initialize if we never queried in this block.
607 if (!LiveRegsValid) {
608 LiveRegs.init(*TRI);
609 LiveRegs.addLiveOuts(MBB);
610 LiveRegPos = MBB.end();
611 LiveRegsValid = true;
612 }
613 // Move backward just before the "Before" position.
614 while (LiveRegPos != Before) {
615 --LiveRegPos;
616 LiveRegs.stepBackward(*LiveRegPos);
617 }
618}
619
620static bool ContainsReg(ArrayRef<std::pair<unsigned, bool>> Regs,
621 unsigned Reg) {
622 for (const std::pair<unsigned, bool> &R : Regs)
623 if (R.first == Reg)
624 return true;
625 return false;
626}
627
628/// Create and insert a LDM or STM with Base as base register and registers in
629/// Regs as the register operands that would be loaded / stored. It returns
630/// true if the transformation is done.
631MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
632 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
633 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
634 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
635 ArrayRef<std::pair<unsigned, bool>> Regs,
637 unsigned NumRegs = Regs.size();
638 assert(NumRegs > 1);
639
640 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
641 // Compute liveness information for that register to make the decision.
642 bool SafeToClobberCPSR = !isThumb1 ||
643 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
645
646 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
647
648 // Exception: If the base register is in the input reglist, Thumb1 LDM is
649 // non-writeback.
650 // It's also not possible to merge an STR of the base register in Thumb1.
651 if (isThumb1 && ContainsReg(Regs, Base)) {
652 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
653 if (Opcode == ARM::tLDRi)
654 Writeback = false;
655 else if (Opcode == ARM::tSTRi)
656 return nullptr;
657 }
658
660 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
661 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
662 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
663
664 if (Offset == 4 && haveIBAndDA) {
666 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
668 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
669 // VLDM/VSTM do not support DB mode without also updating the base reg.
671 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
672 // Check if this is a supported opcode before inserting instructions to
673 // calculate a new base register.
674 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
675
676 // If starting offset isn't zero, insert a MI to materialize a new base.
677 // But only do so if it is cost effective, i.e. merging more than two
678 // loads / stores.
679 if (NumRegs <= 2)
680 return nullptr;
681
682 // On Thumb1, it's not worth materializing a new base register without
683 // clobbering the CPSR (i.e. not using ADDS/SUBS).
684 if (!SafeToClobberCPSR)
685 return nullptr;
686
687 unsigned NewBase;
688 if (isi32Load(Opcode)) {
689 // If it is a load, then just use one of the destination registers
690 // as the new base. Will no longer be writeback in Thumb1.
691 NewBase = Regs[NumRegs-1].first;
692 Writeback = false;
693 } else {
694 // Find a free register that we can use as scratch register.
695 moveLiveRegsBefore(MBB, InsertBefore);
696 // The merged instruction does not exist yet but will use several Regs if
697 // it is a Store.
698 if (!isLoadSingle(Opcode))
699 for (const std::pair<unsigned, bool> &R : Regs)
700 LiveRegs.addReg(R.first);
701
702 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
703 if (NewBase == 0)
704 return nullptr;
705 }
706
707 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
708 : ARM::t2ADDri)
709 : (isThumb1 && Base == ARM::SP)
710 ? ARM::tADDrSPi
711 : (isThumb1 && Offset < 8)
712 ? ARM::tADDi3
713 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
714
715 if (Offset < 0) {
716 // FIXME: There are no Thumb1 load/store instructions with negative
717 // offsets. So the Base != ARM::SP might be unnecessary.
718 Offset = -Offset;
719 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
720 : ARM::t2SUBri)
721 : (isThumb1 && Offset < 8 && Base != ARM::SP)
722 ? ARM::tSUBi3
723 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
724 }
725
726 if (!TL->isLegalAddImmediate(Offset))
727 // FIXME: Try add with register operand?
728 return nullptr; // Probably not worth it then.
729
730 // We can only append a kill flag to the add/sub input if the value is not
731 // used in the register list of the stm as well.
732 bool KillOldBase = BaseKill &&
733 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
734
735 if (isThumb1) {
736 // Thumb1: depending on immediate size, use either
737 // ADDS NewBase, Base, #imm3
738 // or
739 // MOV NewBase, Base
740 // ADDS NewBase, #imm8.
741 if (Base != NewBase &&
742 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
743 // Need to insert a MOV to the new base first.
744 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
745 !STI->hasV6Ops()) {
746 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
747 if (Pred != ARMCC::AL)
748 return nullptr;
749 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
750 .addReg(Base, getKillRegState(KillOldBase));
751 } else
752 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
753 .addReg(Base, getKillRegState(KillOldBase))
754 .add(predOps(Pred, PredReg));
755
756 // The following ADDS/SUBS becomes an update.
757 Base = NewBase;
758 KillOldBase = true;
759 }
760 if (BaseOpc == ARM::tADDrSPi) {
761 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
762 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
763 .addReg(Base, getKillRegState(KillOldBase))
764 .addImm(Offset / 4)
765 .add(predOps(Pred, PredReg));
766 } else
767 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
768 .add(t1CondCodeOp(true))
769 .addReg(Base, getKillRegState(KillOldBase))
770 .addImm(Offset)
771 .add(predOps(Pred, PredReg));
772 } else {
773 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
774 .addReg(Base, getKillRegState(KillOldBase))
775 .addImm(Offset)
776 .add(predOps(Pred, PredReg))
777 .add(condCodeOp());
778 }
779 Base = NewBase;
780 BaseKill = true; // New base is always killed straight away.
781 }
782
783 bool isDef = isLoadSingle(Opcode);
784
785 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
786 // base register writeback.
787 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
788 if (!Opcode)
789 return nullptr;
790
791 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
792 // - There is no writeback (LDM of base register),
793 // - the base register is killed by the merged instruction,
794 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
795 // to reset the base register.
796 // Otherwise, don't merge.
797 // It's safe to return here since the code to materialize a new base register
798 // above is also conditional on SafeToClobberCPSR.
799 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
800 return nullptr;
801
802 MachineInstrBuilder MIB;
803
804 if (Writeback) {
805 assert(isThumb1 && "expected Writeback only inThumb1");
806 if (Opcode == ARM::tLDMIA) {
807 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
808 // Update tLDMIA with writeback if necessary.
809 Opcode = ARM::tLDMIA_UPD;
810 }
811
812 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
813
814 // Thumb1: we might need to set base writeback when building the MI.
815 MIB.addReg(Base, getDefRegState(true))
816 .addReg(Base, getKillRegState(BaseKill));
817
818 // The base isn't dead after a merged instruction with writeback.
819 // Insert a sub instruction after the newly formed instruction to reset.
820 if (!BaseKill)
821 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
822 } else {
823 // No writeback, simply build the MachineInstr.
824 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
825 MIB.addReg(Base, getKillRegState(BaseKill));
826 }
827
828 MIB.addImm(Pred).addReg(PredReg);
829
830 for (const std::pair<unsigned, bool> &R : Regs)
831 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
832
833 MIB.cloneMergedMemRefs(Instrs);
834
835 return MIB.getInstr();
836}
837
838MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
839 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
840 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
841 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
842 ArrayRef<std::pair<unsigned, bool>> Regs,
843 ArrayRef<MachineInstr*> Instrs) const {
844 bool IsLoad = isi32Load(Opcode);
845 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
846 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
847
848 assert(Regs.size() == 2);
849 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
850 TII->get(LoadStoreOpcode));
851 if (IsLoad) {
852 MIB.addReg(Regs[0].first, RegState::Define)
853 .addReg(Regs[1].first, RegState::Define);
854 } else {
855 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
856 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
857 }
858 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
859 MIB.cloneMergedMemRefs(Instrs);
860 return MIB.getInstr();
861}
862
863/// Call MergeOps and update MemOps and merges accordingly on success.
864MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
865 const MachineInstr *First = Cand.Instrs.front();
866 unsigned Opcode = First->getOpcode();
867 bool IsLoad = isLoadSingle(Opcode);
869 SmallVector<unsigned, 4> ImpDefs;
870 DenseSet<unsigned> KilledRegs;
871 DenseSet<unsigned> UsedRegs;
872 // Determine list of registers and list of implicit super-register defs.
873 for (const MachineInstr *MI : Cand.Instrs) {
874 const MachineOperand &MO = getLoadStoreRegOp(*MI);
875 Register Reg = MO.getReg();
876 bool IsKill = MO.isKill();
877 if (IsKill)
878 KilledRegs.insert(Reg);
879 Regs.push_back(std::make_pair(Reg, IsKill));
880 UsedRegs.insert(Reg);
881
882 if (IsLoad) {
883 // Collect any implicit defs of super-registers, after merging we can't
884 // be sure anymore that we properly preserved these live ranges and must
885 // removed these implicit operands.
886 for (const MachineOperand &MO : MI->implicit_operands()) {
887 if (!MO.isReg() || !MO.isDef() || MO.isDead())
888 continue;
889 assert(MO.isImplicit());
890 Register DefReg = MO.getReg();
891
892 if (is_contained(ImpDefs, DefReg))
893 continue;
894 // We can ignore cases where the super-reg is read and written.
895 if (MI->readsRegister(DefReg, /*TRI=*/nullptr))
896 continue;
897 ImpDefs.push_back(DefReg);
898 }
899 }
900 }
901
902 // Attempt the merge.
904
905 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
906 iterator InsertBefore = std::next(iterator(LatestMI));
907 MachineBasicBlock &MBB = *LatestMI->getParent();
908 unsigned Offset = getMemoryOpOffset(*First);
910 bool BaseKill = LatestMI->killsRegister(Base, /*TRI=*/nullptr);
911 Register PredReg;
912 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
913 DebugLoc DL = First->getDebugLoc();
914 MachineInstr *Merged = nullptr;
915 if (Cand.CanMergeToLSDouble)
916 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
917 Opcode, Pred, PredReg, DL, Regs,
918 Cand.Instrs);
919 if (!Merged && Cand.CanMergeToLSMulti)
920 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
921 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
922 if (!Merged)
923 return nullptr;
924
925 // Determine earliest instruction that will get removed. We then keep an
926 // iterator just above it so the following erases don't invalidated it.
927 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
928 bool EarliestAtBegin = false;
929 if (EarliestI == MBB.begin()) {
930 EarliestAtBegin = true;
931 } else {
932 EarliestI = std::prev(EarliestI);
933 }
934
935 // Remove instructions which have been merged.
936 for (MachineInstr *MI : Cand.Instrs)
937 MBB.erase(MI);
938
939 // Determine range between the earliest removed instruction and the new one.
940 if (EarliestAtBegin)
941 EarliestI = MBB.begin();
942 else
943 EarliestI = std::next(EarliestI);
944 auto FixupRange = make_range(EarliestI, iterator(Merged));
945
946 if (isLoadSingle(Opcode)) {
947 // If the previous loads defined a super-reg, then we have to mark earlier
948 // operands undef; Replicate the super-reg def on the merged instruction.
949 for (MachineInstr &MI : FixupRange) {
950 for (unsigned &ImpDefReg : ImpDefs) {
951 for (MachineOperand &MO : MI.implicit_operands()) {
952 if (!MO.isReg() || MO.getReg() != ImpDefReg)
953 continue;
954 if (MO.readsReg())
955 MO.setIsUndef();
956 else if (MO.isDef())
957 ImpDefReg = 0;
958 }
959 }
960 }
961
962 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
963 for (unsigned ImpDef : ImpDefs)
964 MIB.addReg(ImpDef, RegState::ImplicitDefine);
965 } else {
966 // Remove kill flags: We are possibly storing the values later now.
967 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
968 for (MachineInstr &MI : FixupRange) {
969 for (MachineOperand &MO : MI.uses()) {
970 if (!MO.isReg() || !MO.isKill())
971 continue;
972 if (UsedRegs.count(MO.getReg()))
973 MO.setIsKill(false);
974 }
975 }
976 assert(ImpDefs.empty());
977 }
978
979 return Merged;
980}
981
983 unsigned Value = abs(Offset);
984 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
985 // multiplied by 4.
986 return (Value % 4) == 0 && Value < 1024;
987}
988
989/// Return true for loads/stores that can be combined to a double/multi
990/// operation without increasing the requirements for alignment.
992 const MachineInstr &MI) {
993 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
994 // difference.
995 unsigned Opcode = MI.getOpcode();
996 if (!isi32Load(Opcode) && !isi32Store(Opcode))
997 return true;
998
999 // Stack pointer alignment is out of the programmers control so we can trust
1000 // SP-relative loads/stores.
1001 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
1003 return true;
1004 return false;
1005}
1006
1007/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
1008void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1009 const MachineInstr *FirstMI = MemOps[0].MI;
1010 unsigned Opcode = FirstMI->getOpcode();
1011 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
1012 unsigned Size = getLSMultipleTransferSize(FirstMI);
1013
1014 unsigned SIndex = 0;
1015 unsigned EIndex = MemOps.size();
1016 do {
1017 // Look at the first instruction.
1018 const MachineInstr *MI = MemOps[SIndex].MI;
1019 int Offset = MemOps[SIndex].Offset;
1020 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1021 Register PReg = PMO.getReg();
1022 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1023 : TRI->getEncodingValue(PReg);
1024 unsigned Latest = SIndex;
1025 unsigned Earliest = SIndex;
1026 unsigned Count = 1;
1027 bool CanMergeToLSDouble =
1028 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1029 // ARM errata 602117: LDRD with base in list may result in incorrect base
1030 // register when interrupted or faulted.
1031 if (STI->isCortexM3() && isi32Load(Opcode) &&
1032 PReg == getLoadStoreBaseOp(*MI).getReg())
1033 CanMergeToLSDouble = false;
1034
1035 bool CanMergeToLSMulti = true;
1036 // On swift vldm/vstm starting with an odd register number as that needs
1037 // more uops than single vldrs.
1038 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1039 CanMergeToLSMulti = false;
1040
1041 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1042 // deprecated; LDM to PC is fine but cannot happen here.
1043 if (PReg == ARM::SP || PReg == ARM::PC)
1044 CanMergeToLSMulti = CanMergeToLSDouble = false;
1045
1046 // Should we be conservative?
1048 CanMergeToLSMulti = CanMergeToLSDouble = false;
1049
1050 // vldm / vstm limit are 32 for S variants, 16 for D variants.
1051 unsigned Limit;
1052 switch (Opcode) {
1053 default:
1054 Limit = UINT_MAX;
1055 break;
1056 case ARM::VLDRD:
1057 case ARM::VSTRD:
1058 Limit = 16;
1059 break;
1060 }
1061
1062 // Merge following instructions where possible.
1063 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1064 int NewOffset = MemOps[I].Offset;
1065 if (NewOffset != Offset + (int)Size)
1066 break;
1067 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1068 Register Reg = MO.getReg();
1069 if (Reg == ARM::SP || Reg == ARM::PC)
1070 break;
1071 if (Count == Limit)
1072 break;
1073
1074 // See if the current load/store may be part of a multi load/store.
1075 unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1076 : TRI->getEncodingValue(Reg);
1077 bool PartOfLSMulti = CanMergeToLSMulti;
1078 if (PartOfLSMulti) {
1079 // Register numbers must be in ascending order.
1080 if (RegNum <= PRegNum)
1081 PartOfLSMulti = false;
1082 // For VFP / NEON load/store multiples, the registers must be
1083 // consecutive and within the limit on the number of registers per
1084 // instruction.
1085 else if (!isNotVFP && RegNum != PRegNum+1)
1086 PartOfLSMulti = false;
1087 }
1088 // See if the current load/store may be part of a double load/store.
1089 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1090
1091 if (!PartOfLSMulti && !PartOfLSDouble)
1092 break;
1093 CanMergeToLSMulti &= PartOfLSMulti;
1094 CanMergeToLSDouble &= PartOfLSDouble;
1095 // Track MemOp with latest and earliest position (Positions are
1096 // counted in reverse).
1097 unsigned Position = MemOps[I].Position;
1098 if (Position < MemOps[Latest].Position)
1099 Latest = I;
1100 else if (Position > MemOps[Earliest].Position)
1101 Earliest = I;
1102 // Prepare for next MemOp.
1103 Offset += Size;
1104 PRegNum = RegNum;
1105 }
1106
1107 // Form a candidate from the Ops collected so far.
1108 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1109 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1110 Candidate->Instrs.push_back(MemOps[C].MI);
1111 Candidate->LatestMIIdx = Latest - SIndex;
1112 Candidate->EarliestMIIdx = Earliest - SIndex;
1113 Candidate->InsertPos = MemOps[Latest].Position;
1114 if (Count == 1)
1115 CanMergeToLSMulti = CanMergeToLSDouble = false;
1116 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1117 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1118 Candidates.push_back(Candidate);
1119 // Continue after the chain.
1120 SIndex += Count;
1121 } while (SIndex < EIndex);
1122}
1123
1124static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1126 switch (Opc) {
1127 default: llvm_unreachable("Unhandled opcode!");
1128 case ARM::LDMIA:
1129 case ARM::LDMDA:
1130 case ARM::LDMDB:
1131 case ARM::LDMIB:
1132 switch (Mode) {
1133 default: llvm_unreachable("Unhandled submode!");
1134 case ARM_AM::ia: return ARM::LDMIA_UPD;
1135 case ARM_AM::ib: return ARM::LDMIB_UPD;
1136 case ARM_AM::da: return ARM::LDMDA_UPD;
1137 case ARM_AM::db: return ARM::LDMDB_UPD;
1138 }
1139 case ARM::STMIA:
1140 case ARM::STMDA:
1141 case ARM::STMDB:
1142 case ARM::STMIB:
1143 switch (Mode) {
1144 default: llvm_unreachable("Unhandled submode!");
1145 case ARM_AM::ia: return ARM::STMIA_UPD;
1146 case ARM_AM::ib: return ARM::STMIB_UPD;
1147 case ARM_AM::da: return ARM::STMDA_UPD;
1148 case ARM_AM::db: return ARM::STMDB_UPD;
1149 }
1150 case ARM::t2LDMIA:
1151 case ARM::t2LDMDB:
1152 switch (Mode) {
1153 default: llvm_unreachable("Unhandled submode!");
1154 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1155 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1156 }
1157 case ARM::t2STMIA:
1158 case ARM::t2STMDB:
1159 switch (Mode) {
1160 default: llvm_unreachable("Unhandled submode!");
1161 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1162 case ARM_AM::db: return ARM::t2STMDB_UPD;
1163 }
1164 case ARM::VLDMSIA:
1165 switch (Mode) {
1166 default: llvm_unreachable("Unhandled submode!");
1167 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1168 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1169 }
1170 case ARM::VLDMDIA:
1171 switch (Mode) {
1172 default: llvm_unreachable("Unhandled submode!");
1173 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1174 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1175 }
1176 case ARM::VSTMSIA:
1177 switch (Mode) {
1178 default: llvm_unreachable("Unhandled submode!");
1179 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1180 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1181 }
1182 case ARM::VSTMDIA:
1183 switch (Mode) {
1184 default: llvm_unreachable("Unhandled submode!");
1185 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1186 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1187 }
1188 }
1189}
1190
1191/// Check if the given instruction increments or decrements a register and
1192/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1193/// generated by the instruction are possibly read as well.
1195 ARMCC::CondCodes Pred, Register PredReg) {
1196 bool CheckCPSRDef;
1197 int Scale;
1198 switch (MI.getOpcode()) {
1199 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1200 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1201 case ARM::t2SUBri:
1202 case ARM::t2SUBspImm:
1203 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1204 case ARM::t2ADDri:
1205 case ARM::t2ADDspImm:
1206 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1207 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1208 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1209 default: return 0;
1210 }
1211
1212 Register MIPredReg;
1213 if (MI.getOperand(0).getReg() != Reg ||
1214 MI.getOperand(1).getReg() != Reg ||
1215 getInstrPredicate(MI, MIPredReg) != Pred ||
1216 MIPredReg != PredReg)
1217 return 0;
1218
1219 if (CheckCPSRDef && definesCPSR(MI))
1220 return 0;
1221 return MI.getOperand(2).getImm() * Scale;
1222}
1223
1224/// Searches for an increment or decrement of \p Reg before \p MBBI.
1227 ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
1228 Offset = 0;
1229 MachineBasicBlock &MBB = *MBBI->getParent();
1230 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1231 MachineBasicBlock::iterator EndMBBI = MBB.end();
1232 if (MBBI == BeginMBBI)
1233 return EndMBBI;
1234
1235 // Skip debug values.
1236 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1237 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1238 --PrevMBBI;
1239
1240 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1241 return Offset == 0 ? EndMBBI : PrevMBBI;
1242}
1243
1244/// Searches for a increment or decrement of \p Reg after \p MBBI.
1247 ARMCC::CondCodes Pred, Register PredReg, int &Offset,
1248 const TargetRegisterInfo *TRI) {
1249 Offset = 0;
1250 MachineBasicBlock &MBB = *MBBI->getParent();
1251 MachineBasicBlock::iterator EndMBBI = MBB.end();
1252 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1253 while (NextMBBI != EndMBBI) {
1254 // Skip debug values.
1255 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1256 ++NextMBBI;
1257 if (NextMBBI == EndMBBI)
1258 return EndMBBI;
1259
1260 unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1261 if (Off) {
1262 Offset = Off;
1263 return NextMBBI;
1264 }
1265
1266 // SP can only be combined if it is the next instruction after the original
1267 // MBBI, otherwise we may be incrementing the stack pointer (invalidating
1268 // anything below the new pointer) when its frame elements are still in
1269 // use. Other registers can attempt to look further, until a different use
1270 // or def of the register is found.
1271 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1272 NextMBBI->definesRegister(Reg, TRI))
1273 return EndMBBI;
1274
1275 ++NextMBBI;
1276 }
1277 return EndMBBI;
1278}
1279
1280/// Fold proceeding/trailing inc/dec of base register into the
1281/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1282///
1283/// stmia rn, <ra, rb, rc>
1284/// rn := rn + 4 * 3;
1285/// =>
1286/// stmia rn!, <ra, rb, rc>
1287///
1288/// rn := rn - 4 * 3;
1289/// ldmia rn, <ra, rb, rc>
1290/// =>
1291/// ldmdb rn!, <ra, rb, rc>
1292bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1293 // Thumb1 is already using updating loads/stores.
1294 if (isThumb1) return false;
1295 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1296
1297 const MachineOperand &BaseOP = MI->getOperand(0);
1298 Register Base = BaseOP.getReg();
1299 bool BaseKill = BaseOP.isKill();
1300 Register PredReg;
1301 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1302 unsigned Opcode = MI->getOpcode();
1303 DebugLoc DL = MI->getDebugLoc();
1304
1305 // Can't use an updating ld/st if the base register is also a dest
1306 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1307 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1308 if (MO.getReg() == Base)
1309 return false;
1310
1311 int Bytes = getLSMultipleTransferSize(MI);
1312 MachineBasicBlock &MBB = *MI->getParent();
1314 int Offset;
1316 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1318 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1319 Mode = ARM_AM::db;
1320 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1321 Mode = ARM_AM::da;
1322 } else {
1323 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1324 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1325 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1326
1327 // We couldn't find an inc/dec to merge. But if the base is dead, we
1328 // can still change to a writeback form as that will save us 2 bytes
1329 // of code size. It can create WAW hazards though, so only do it if
1330 // we're minimizing code size.
1331 if (!STI->hasMinSize() || !BaseKill)
1332 return false;
1333
1334 bool HighRegsUsed = false;
1335 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1336 if (MO.getReg() >= ARM::R8) {
1337 HighRegsUsed = true;
1338 break;
1339 }
1340
1341 if (!HighRegsUsed)
1342 MergeInstr = MBB.end();
1343 else
1344 return false;
1345 }
1346 }
1347 if (MergeInstr != MBB.end()) {
1348 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1349 MBB.erase(MergeInstr);
1350 }
1351
1352 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1353 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1354 .addReg(Base, getDefRegState(true)) // WB base register
1355 .addReg(Base, getKillRegState(BaseKill))
1356 .addImm(Pred).addReg(PredReg);
1357
1358 // Transfer the rest of operands.
1359 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
1360 MIB.add(MO);
1361
1362 // Transfer memoperands.
1363 MIB.setMemRefs(MI->memoperands());
1364
1365 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1366 MBB.erase(MBBI);
1367 return true;
1368}
1369
1370static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1372 switch (Opc) {
1373 case ARM::LDRi12:
1374 return ARM::LDR_PRE_IMM;
1375 case ARM::STRi12:
1376 return ARM::STR_PRE_IMM;
1377 case ARM::VLDRS:
1378 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1379 case ARM::VLDRD:
1380 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1381 case ARM::VSTRS:
1382 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1383 case ARM::VSTRD:
1384 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1385 case ARM::t2LDRi8:
1386 case ARM::t2LDRi12:
1387 return ARM::t2LDR_PRE;
1388 case ARM::t2STRi8:
1389 case ARM::t2STRi12:
1390 return ARM::t2STR_PRE;
1391 default: llvm_unreachable("Unhandled opcode!");
1392 }
1393}
1394
1395static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1397 switch (Opc) {
1398 case ARM::LDRi12:
1399 return ARM::LDR_POST_IMM;
1400 case ARM::STRi12:
1401 return ARM::STR_POST_IMM;
1402 case ARM::VLDRS:
1403 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1404 case ARM::VLDRD:
1405 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1406 case ARM::VSTRS:
1407 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1408 case ARM::VSTRD:
1409 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1410 case ARM::t2LDRi8:
1411 case ARM::t2LDRi12:
1412 return ARM::t2LDR_POST;
1413 case ARM::t2LDRBi8:
1414 case ARM::t2LDRBi12:
1415 return ARM::t2LDRB_POST;
1416 case ARM::t2LDRSBi8:
1417 case ARM::t2LDRSBi12:
1418 return ARM::t2LDRSB_POST;
1419 case ARM::t2LDRHi8:
1420 case ARM::t2LDRHi12:
1421 return ARM::t2LDRH_POST;
1422 case ARM::t2LDRSHi8:
1423 case ARM::t2LDRSHi12:
1424 return ARM::t2LDRSH_POST;
1425 case ARM::t2STRi8:
1426 case ARM::t2STRi12:
1427 return ARM::t2STR_POST;
1428 case ARM::t2STRBi8:
1429 case ARM::t2STRBi12:
1430 return ARM::t2STRB_POST;
1431 case ARM::t2STRHi8:
1432 case ARM::t2STRHi12:
1433 return ARM::t2STRH_POST;
1434
1435 case ARM::MVE_VLDRBS16:
1436 return ARM::MVE_VLDRBS16_post;
1437 case ARM::MVE_VLDRBS32:
1438 return ARM::MVE_VLDRBS32_post;
1439 case ARM::MVE_VLDRBU16:
1440 return ARM::MVE_VLDRBU16_post;
1441 case ARM::MVE_VLDRBU32:
1442 return ARM::MVE_VLDRBU32_post;
1443 case ARM::MVE_VLDRHS32:
1444 return ARM::MVE_VLDRHS32_post;
1445 case ARM::MVE_VLDRHU32:
1446 return ARM::MVE_VLDRHU32_post;
1447 case ARM::MVE_VLDRBU8:
1448 return ARM::MVE_VLDRBU8_post;
1449 case ARM::MVE_VLDRHU16:
1450 return ARM::MVE_VLDRHU16_post;
1451 case ARM::MVE_VLDRWU32:
1452 return ARM::MVE_VLDRWU32_post;
1453 case ARM::MVE_VSTRB16:
1454 return ARM::MVE_VSTRB16_post;
1455 case ARM::MVE_VSTRB32:
1456 return ARM::MVE_VSTRB32_post;
1457 case ARM::MVE_VSTRH32:
1458 return ARM::MVE_VSTRH32_post;
1459 case ARM::MVE_VSTRBU8:
1460 return ARM::MVE_VSTRBU8_post;
1461 case ARM::MVE_VSTRHU16:
1462 return ARM::MVE_VSTRHU16_post;
1463 case ARM::MVE_VSTRWU32:
1464 return ARM::MVE_VSTRWU32_post;
1465
1466 default: llvm_unreachable("Unhandled opcode!");
1467 }
1468}
1469
1470/// Fold proceeding/trailing inc/dec of base register into the
1471/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1472bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1473 // Thumb1 doesn't have updating LDR/STR.
1474 // FIXME: Use LDM/STM with single register instead.
1475 if (isThumb1) return false;
1476 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1477
1479 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1480 unsigned Opcode = MI->getOpcode();
1481 DebugLoc DL = MI->getDebugLoc();
1482 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1483 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1484 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1485 if (isi32Load(Opcode) || isi32Store(Opcode))
1486 if (MI->getOperand(2).getImm() != 0)
1487 return false;
1488 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1489 return false;
1490
1491 // Can't do the merge if the destination register is the same as the would-be
1492 // writeback register.
1493 if (MI->getOperand(0).getReg() == Base)
1494 return false;
1495
1496 Register PredReg;
1497 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1498 int Bytes = getLSMultipleTransferSize(MI);
1499 MachineBasicBlock &MBB = *MI->getParent();
1501 int Offset;
1503 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1504 unsigned NewOpc;
1505 if (!isAM5 && Offset == Bytes) {
1506 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1507 } else if (Offset == -Bytes) {
1508 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1509 } else {
1510 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1511 if (MergeInstr == MBB.end())
1512 return false;
1513
1515 if ((isAM5 && Offset != Bytes) ||
1516 (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
1518 if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
1519 return false;
1520 }
1521 }
1522 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1523 MBB.erase(MergeInstr);
1524
1526
1527 bool isLd = isLoadSingle(Opcode);
1528 if (isAM5) {
1529 // VLDM[SD]_UPD, VSTM[SD]_UPD
1530 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1531 // updating load/store-multiple instructions can be used with only one
1532 // register.)
1533 MachineOperand &MO = MI->getOperand(0);
1534 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1535 .addReg(Base, getDefRegState(true)) // WB base register
1536 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1537 .addImm(Pred)
1538 .addReg(PredReg)
1539 .addReg(MO.getReg(), (isLd ? getDefRegState(true)
1540 : getKillRegState(MO.isKill())))
1541 .cloneMemRefs(*MI);
1542 (void)MIB;
1543 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1544 } else if (isLd) {
1545 if (isAM2) {
1546 // LDR_PRE, LDR_POST
1547 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1548 auto MIB =
1549 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1550 .addReg(Base, RegState::Define)
1551 .addReg(Base)
1552 .addImm(Offset)
1553 .addImm(Pred)
1554 .addReg(PredReg)
1555 .cloneMemRefs(*MI);
1556 (void)MIB;
1557 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1558 } else {
1560 auto MIB =
1561 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1562 .addReg(Base, RegState::Define)
1563 .addReg(Base)
1564 .addReg(0)
1565 .addImm(Imm)
1566 .add(predOps(Pred, PredReg))
1567 .cloneMemRefs(*MI);
1568 (void)MIB;
1569 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1570 }
1571 } else {
1572 // t2LDR_PRE, t2LDR_POST
1573 auto MIB =
1574 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1575 .addReg(Base, RegState::Define)
1576 .addReg(Base)
1577 .addImm(Offset)
1578 .add(predOps(Pred, PredReg))
1579 .cloneMemRefs(*MI);
1580 (void)MIB;
1581 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1582 }
1583 } else {
1584 MachineOperand &MO = MI->getOperand(0);
1585 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1586 // the vestigial zero-reg offset register. When that's fixed, this clause
1587 // can be removed entirely.
1588 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1590 // STR_PRE, STR_POST
1591 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1592 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1593 .addReg(Base)
1594 .addReg(0)
1595 .addImm(Imm)
1596 .add(predOps(Pred, PredReg))
1597 .cloneMemRefs(*MI);
1598 (void)MIB;
1599 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1600 } else {
1601 // t2STR_PRE, t2STR_POST
1602 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1603 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1604 .addReg(Base)
1605 .addImm(Offset)
1606 .add(predOps(Pred, PredReg))
1607 .cloneMemRefs(*MI);
1608 (void)MIB;
1609 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1610 }
1611 }
1612 MBB.erase(MBBI);
1613
1614 return true;
1615}
1616
1617bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1618 unsigned Opcode = MI.getOpcode();
1619 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1620 "Must have t2STRDi8 or t2LDRDi8");
1621 if (MI.getOperand(3).getImm() != 0)
1622 return false;
1623 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1624
1625 // Behaviour for writeback is undefined if base register is the same as one
1626 // of the others.
1627 const MachineOperand &BaseOp = MI.getOperand(2);
1628 Register Base = BaseOp.getReg();
1629 const MachineOperand &Reg0Op = MI.getOperand(0);
1630 const MachineOperand &Reg1Op = MI.getOperand(1);
1631 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1632 return false;
1633
1634 Register PredReg;
1635 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1637 MachineBasicBlock &MBB = *MI.getParent();
1638 int Offset;
1640 PredReg, Offset);
1641 unsigned NewOpc;
1642 if (Offset == 8 || Offset == -8) {
1643 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1644 } else {
1645 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1646 if (MergeInstr == MBB.end())
1647 return false;
1648 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1649 if (!isLegalAddressImm(NewOpc, Offset, TII))
1650 return false;
1651 }
1652 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1653 MBB.erase(MergeInstr);
1654
1655 DebugLoc DL = MI.getDebugLoc();
1656 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1657 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1658 MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1659 } else {
1660 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1661 MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1662 }
1663 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1664 .addImm(Offset).addImm(Pred).addReg(PredReg);
1665 assert(TII->get(Opcode).getNumOperands() == 6 &&
1666 TII->get(NewOpc).getNumOperands() == 7 &&
1667 "Unexpected number of operands in Opcode specification.");
1668
1669 // Transfer implicit operands.
1670 for (const MachineOperand &MO : MI.implicit_operands())
1671 MIB.add(MO);
1672 MIB.cloneMemRefs(MI);
1673
1674 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1675 MBB.erase(MBBI);
1676 return true;
1677}
1678
1679/// Returns true if instruction is a memory operation that this pass is capable
1680/// of operating on.
1681static bool isMemoryOp(const MachineInstr &MI) {
1682 unsigned Opcode = MI.getOpcode();
1683 switch (Opcode) {
1684 case ARM::VLDRS:
1685 case ARM::VSTRS:
1686 case ARM::VLDRD:
1687 case ARM::VSTRD:
1688 case ARM::LDRi12:
1689 case ARM::STRi12:
1690 case ARM::tLDRi:
1691 case ARM::tSTRi:
1692 case ARM::tLDRspi:
1693 case ARM::tSTRspi:
1694 case ARM::t2LDRi8:
1695 case ARM::t2LDRi12:
1696 case ARM::t2STRi8:
1697 case ARM::t2STRi12:
1698 break;
1699 default:
1700 return false;
1701 }
1702 if (!MI.getOperand(1).isReg())
1703 return false;
1704
1705 // When no memory operands are present, conservatively assume unaligned,
1706 // volatile, unfoldable.
1707 if (!MI.hasOneMemOperand())
1708 return false;
1709
1710 const MachineMemOperand &MMO = **MI.memoperands_begin();
1711
1712 // Don't touch volatile memory accesses - we may be changing their order.
1713 // TODO: We could allow unordered and monotonic atomics here, but we need to
1714 // make sure the resulting ldm/stm is correctly marked as atomic.
1715 if (MMO.isVolatile() || MMO.isAtomic())
1716 return false;
1717
1718 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1719 // not.
1720 if (MMO.getAlign() < Align(4))
1721 return false;
1722
1723 // str <undef> could probably be eliminated entirely, but for now we just want
1724 // to avoid making a mess of it.
1725 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1726 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1727 return false;
1728
1729 // Likewise don't mess with references to undefined addresses.
1730 if (MI.getOperand(1).isUndef())
1731 return false;
1732
1733 return true;
1734}
1735
1738 bool isDef, unsigned NewOpc, unsigned Reg,
1739 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1740 bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1741 unsigned PredReg, const TargetInstrInfo *TII,
1742 MachineInstr *MI) {
1743 if (isDef) {
1744 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1745 TII->get(NewOpc))
1746 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1747 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1748 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1749 // FIXME: This is overly conservative; the new instruction accesses 4
1750 // bytes, not 8.
1751 MIB.cloneMemRefs(*MI);
1752 } else {
1753 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1754 TII->get(NewOpc))
1755 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1756 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1757 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1758 // FIXME: This is overly conservative; the new instruction accesses 4
1759 // bytes, not 8.
1760 MIB.cloneMemRefs(*MI);
1761 }
1762}
1763
1764bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1766 MachineInstr *MI = &*MBBI;
1767 unsigned Opcode = MI->getOpcode();
1768 // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1769 // if we see this opcode.
1770 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1771 return false;
1772
1773 const MachineOperand &BaseOp = MI->getOperand(2);
1774 Register BaseReg = BaseOp.getReg();
1775 Register EvenReg = MI->getOperand(0).getReg();
1776 Register OddReg = MI->getOperand(1).getReg();
1777 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1778 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1779
1780 // ARM errata 602117: LDRD with base in list may result in incorrect base
1781 // register when interrupted or faulted.
1782 bool Errata602117 = EvenReg == BaseReg &&
1783 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1784 // ARM LDRD/STRD needs consecutive registers.
1785 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1786 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1787
1788 if (!Errata602117 && !NonConsecutiveRegs)
1789 return false;
1790
1791 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1792 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1793 bool EvenDeadKill = isLd ?
1794 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1795 bool EvenUndef = MI->getOperand(0).isUndef();
1796 bool OddDeadKill = isLd ?
1797 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1798 bool OddUndef = MI->getOperand(1).isUndef();
1799 bool BaseKill = BaseOp.isKill();
1800 bool BaseUndef = BaseOp.isUndef();
1801 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1802 "register offset not handled below");
1803 int OffImm = getMemoryOpOffset(*MI);
1804 Register PredReg;
1805 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1806
1807 if (OddRegNum > EvenRegNum && OffImm == 0) {
1808 // Ascending register numbers and no offset. It's safe to change it to a
1809 // ldm or stm.
1810 unsigned NewOpc = (isLd)
1811 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1812 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1813 if (isLd) {
1814 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1815 .addReg(BaseReg, getKillRegState(BaseKill))
1816 .addImm(Pred).addReg(PredReg)
1817 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1818 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1819 .cloneMemRefs(*MI);
1820 ++NumLDRD2LDM;
1821 } else {
1822 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1823 .addReg(BaseReg, getKillRegState(BaseKill))
1824 .addImm(Pred).addReg(PredReg)
1825 .addReg(EvenReg,
1826 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1827 .addReg(OddReg,
1828 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1829 .cloneMemRefs(*MI);
1830 ++NumSTRD2STM;
1831 }
1832 } else {
1833 // Split into two instructions.
1834 unsigned NewOpc = (isLd)
1835 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1836 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1837 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1838 // so adjust and use t2LDRi12 here for that.
1839 unsigned NewOpc2 = (isLd)
1840 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1841 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1842 // If this is a load, make sure the first load does not clobber the base
1843 // register before the second load reads it.
1844 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1845 assert(!TRI->regsOverlap(OddReg, BaseReg));
1846 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1847 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1848 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1849 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1850 MI);
1851 } else {
1852 if (OddReg == EvenReg && EvenDeadKill) {
1853 // If the two source operands are the same, the kill marker is
1854 // probably on the first one. e.g.
1855 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1856 EvenDeadKill = false;
1857 OddDeadKill = true;
1858 }
1859 // Never kill the base register in the first instruction.
1860 if (EvenReg == BaseReg)
1861 EvenDeadKill = false;
1862 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1863 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1864 MI);
1865 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1866 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1867 MI);
1868 }
1869 if (isLd)
1870 ++NumLDRD2LDR;
1871 else
1872 ++NumSTRD2STR;
1873 }
1874
1875 MBBI = MBB.erase(MBBI);
1876 return true;
1877}
1878
1879/// An optimization pass to turn multiple LDR / STR ops of the same base and
1880/// incrementing offset into LDM / STM ops.
1881bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1882 MemOpQueue MemOps;
1883 unsigned CurrBase = 0;
1884 unsigned CurrOpc = ~0u;
1885 ARMCC::CondCodes CurrPred = ARMCC::AL;
1886 unsigned Position = 0;
1887 assert(Candidates.size() == 0);
1888 assert(MergeBaseCandidates.size() == 0);
1889 LiveRegsValid = false;
1890
1892 I = MBBI) {
1893 // The instruction in front of the iterator is the one we look at.
1894 MBBI = std::prev(I);
1895 if (FixInvalidRegPairOp(MBB, MBBI))
1896 continue;
1897 ++Position;
1898
1899 if (isMemoryOp(*MBBI)) {
1900 unsigned Opcode = MBBI->getOpcode();
1901 const MachineOperand &MO = MBBI->getOperand(0);
1902 Register Reg = MO.getReg();
1904 Register PredReg;
1905 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1907 if (CurrBase == 0) {
1908 // Start of a new chain.
1909 CurrBase = Base;
1910 CurrOpc = Opcode;
1911 CurrPred = Pred;
1912 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1913 continue;
1914 }
1915 // Note: No need to match PredReg in the next if.
1916 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1917 // Watch out for:
1918 // r4 := ldr [r0, #8]
1919 // r4 := ldr [r0, #4]
1920 // or
1921 // r0 := ldr [r0]
1922 // If a load overrides the base register or a register loaded by
1923 // another load in our chain, we cannot take this instruction.
1924 bool Overlap = false;
1925 if (isLoadSingle(Opcode)) {
1926 Overlap = (Base == Reg);
1927 if (!Overlap) {
1928 for (const MemOpQueueEntry &E : MemOps) {
1929 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1930 Overlap = true;
1931 break;
1932 }
1933 }
1934 }
1935 }
1936
1937 if (!Overlap) {
1938 // Check offset and sort memory operation into the current chain.
1939 if (Offset > MemOps.back().Offset) {
1940 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1941 continue;
1942 } else {
1943 MemOpQueue::iterator MI, ME;
1944 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1945 if (Offset < MI->Offset) {
1946 // Found a place to insert.
1947 break;
1948 }
1949 if (Offset == MI->Offset) {
1950 // Collision, abort.
1951 MI = ME;
1952 break;
1953 }
1954 }
1955 if (MI != MemOps.end()) {
1956 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1957 continue;
1958 }
1959 }
1960 }
1961 }
1962
1963 // Don't advance the iterator; The op will start a new chain next.
1964 MBBI = I;
1965 --Position;
1966 // Fallthrough to look into existing chain.
1967 } else if (MBBI->isDebugInstr()) {
1968 continue;
1969 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1970 MBBI->getOpcode() == ARM::t2STRDi8) {
1971 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1972 // remember them because we may still be able to merge add/sub into them.
1973 MergeBaseCandidates.push_back(&*MBBI);
1974 }
1975
1976 // If we are here then the chain is broken; Extract candidates for a merge.
1977 if (MemOps.size() > 0) {
1978 FormCandidates(MemOps);
1979 // Reset for the next chain.
1980 CurrBase = 0;
1981 CurrOpc = ~0u;
1982 CurrPred = ARMCC::AL;
1983 MemOps.clear();
1984 }
1985 }
1986 if (MemOps.size() > 0)
1987 FormCandidates(MemOps);
1988
1989 // Sort candidates so they get processed from end to begin of the basic
1990 // block later; This is necessary for liveness calculation.
1991 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1992 return M0->InsertPos < M1->InsertPos;
1993 };
1994 llvm::sort(Candidates, LessThan);
1995
1996 // Go through list of candidates and merge.
1997 bool Changed = false;
1998 for (const MergeCandidate *Candidate : Candidates) {
1999 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
2000 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
2001 // Merge preceding/trailing base inc/dec into the merged op.
2002 if (Merged) {
2003 Changed = true;
2004 unsigned Opcode = Merged->getOpcode();
2005 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2006 MergeBaseUpdateLSDouble(*Merged);
2007 else
2008 MergeBaseUpdateLSMultiple(Merged);
2009 } else {
2010 for (MachineInstr *MI : Candidate->Instrs) {
2011 if (MergeBaseUpdateLoadStore(MI))
2012 Changed = true;
2013 }
2014 }
2015 } else {
2016 assert(Candidate->Instrs.size() == 1);
2017 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2018 Changed = true;
2019 }
2020 }
2021 Candidates.clear();
2022 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
2023 for (MachineInstr *MI : MergeBaseCandidates)
2024 MergeBaseUpdateLSDouble(*MI);
2025 MergeBaseCandidates.clear();
2026
2027 return Changed;
2028}
2029
2030/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
2031/// into the preceding stack restore so it directly restore the value of LR
2032/// into pc.
2033/// ldmfd sp!, {..., lr}
2034/// bx lr
2035/// or
2036/// ldmfd sp!, {..., lr}
2037/// mov pc, lr
2038/// =>
2039/// ldmfd sp!, {..., pc}
2040bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
2041 // Thumb1 LDM doesn't allow high registers.
2042 if (isThumb1) return false;
2043 if (MBB.empty()) return false;
2044
2046 if (MBBI != MBB.begin() && MBBI != MBB.end() &&
2047 (MBBI->getOpcode() == ARM::BX_RET ||
2048 MBBI->getOpcode() == ARM::tBX_RET ||
2049 MBBI->getOpcode() == ARM::MOVPCLR)) {
2050 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
2051 // Ignore any debug instructions.
2052 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2053 --PrevI;
2054 MachineInstr &PrevMI = *PrevI;
2055 unsigned Opcode = PrevMI.getOpcode();
2056 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2057 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2058 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2059 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
2060 if (MO.getReg() != ARM::LR)
2061 return false;
2062 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2063 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2064 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2065 PrevMI.setDesc(TII->get(NewOpc));
2066 MO.setReg(ARM::PC);
2067 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
2068 MBB.erase(MBBI);
2069 return true;
2070 }
2071 }
2072 return false;
2073}
2074
2075bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
2077 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
2078 MBBI->getOpcode() != ARM::tBX_RET)
2079 return false;
2080
2082 --Prev;
2083 if (Prev->getOpcode() != ARM::tMOVr ||
2084 !Prev->definesRegister(ARM::LR, /*TRI=*/nullptr))
2085 return false;
2086
2087 for (auto Use : Prev->uses())
2088 if (Use.isKill()) {
2089 assert(STI->hasV4TOps());
2090 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
2091 .addReg(Use.getReg(), RegState::Kill)
2094 MBB.erase(MBBI);
2095 MBB.erase(Prev);
2096 return true;
2097 }
2098
2099 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2100}
2101
2102bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2103 MF = &Fn;
2104 STI = &Fn.getSubtarget<ARMSubtarget>();
2105 TL = STI->getTargetLowering();
2106 AFI = Fn.getInfo<ARMFunctionInfo>();
2107 TII = STI->getInstrInfo();
2108 TRI = STI->getRegisterInfo();
2109
2110 RegClassInfoValid = false;
2111 isThumb2 = AFI->isThumb2Function();
2112 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2113
2114 bool Modified = false, ModifiedLDMReturn = false;
2115 for (MachineBasicBlock &MBB : Fn) {
2116 Modified |= LoadStoreMultipleOpti(MBB);
2117 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2118 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2119 if (isThumb1)
2120 Modified |= CombineMovBx(MBB);
2121 }
2122 Modified |= ModifiedLDMReturn;
2123
2124 // If we merged a BX instruction into an LDM, we need to re-calculate whether
2125 // LR is restored. This check needs to consider the whole function, not just
2126 // the instruction(s) we changed, because there may be other BX returns which
2127 // still need LR to be restored.
2128 if (ModifiedLDMReturn)
2130
2131 Allocator.DestroyAll();
2132 return Modified;
2133}
2134
2135bool ARMLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
2136 if (skipFunction(MF.getFunction()))
2137 return false;
2138 ARMLoadStoreOpt Impl;
2139 return Impl.runOnMachineFunction(MF);
2140}
2141
2142#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2143 "ARM pre- register allocation load / store optimization pass"
2144
2145namespace {
2146
2147/// Pre- register allocation pass that move load / stores from consecutive
2148/// locations close to make it more likely they will be combined later.
2149struct ARMPreAllocLoadStoreOpt {
2151 const DataLayout *TD;
2152 const TargetInstrInfo *TII;
2153 const TargetRegisterInfo *TRI;
2154 const ARMSubtarget *STI;
2157 MachineFunction *MF;
2158
2159 bool runOnMachineFunction(MachineFunction &Fn, AliasAnalysis *AA,
2161
2162private:
2163 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2164 unsigned &NewOpc, Register &EvenReg, Register &OddReg,
2165 Register &BaseReg, int &Offset, Register &PredReg,
2166 ARMCC::CondCodes &Pred, bool &isT2);
2167 bool RescheduleOps(
2169 unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2171 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2172 bool DistributeIncrements();
2173 bool DistributeIncrements(Register Base);
2174};
2175
2176struct ARMPreAllocLoadStoreOptLegacy : public MachineFunctionPass {
2177 static char ID;
2178
2179 ARMPreAllocLoadStoreOptLegacy() : MachineFunctionPass(ID) {}
2180
2181 bool runOnMachineFunction(MachineFunction &Fn) override;
2182
2183 StringRef getPassName() const override {
2185 }
2186
2187 void getAnalysisUsage(AnalysisUsage &AU) const override {
2192 }
2193};
2194
2195char ARMPreAllocLoadStoreOptLegacy::ID = 0;
2196
2197} // end anonymous namespace
2198
2199INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOptLegacy, "arm-prera-ldst-opt",
2202INITIALIZE_PASS_END(ARMPreAllocLoadStoreOptLegacy, "arm-prera-ldst-opt",
2204
2205// Limit the number of instructions to be rescheduled.
2206// FIXME: tune this limit, and/or come up with some better heuristics.
2207static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2208 cl::init(8), cl::Hidden);
2209
2210bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn,
2211 AliasAnalysis *AAIn,
2212 MachineDominatorTree *DTIn) {
2214 return false;
2215
2216 AA = AAIn;
2217 DT = DTIn;
2218 TD = &Fn.getDataLayout();
2219 STI = &Fn.getSubtarget<ARMSubtarget>();
2220 TII = STI->getInstrInfo();
2221 TRI = STI->getRegisterInfo();
2222 MRI = &Fn.getRegInfo();
2223 MF = &Fn;
2224
2225 bool Modified = DistributeIncrements();
2226 for (MachineBasicBlock &MFI : Fn)
2227 Modified |= RescheduleLoadStoreInstrs(&MFI);
2228
2229 return Modified;
2230}
2231
2232bool ARMPreAllocLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &Fn) {
2233 if (skipFunction(Fn.getFunction()))
2234 return false;
2235
2236 ARMPreAllocLoadStoreOpt Impl;
2237 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2238 MachineDominatorTree *DT =
2239 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2240 return Impl.runOnMachineFunction(Fn, AA, DT);
2241}
2242
2243static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2247 SmallSet<unsigned, 4> &MemRegs,
2248 const TargetRegisterInfo *TRI,
2249 AliasAnalysis *AA) {
2250 // Are there stores / loads / calls between them?
2251 SmallSet<unsigned, 4> AddedRegPressure;
2252 while (++I != E) {
2253 if (I->isDebugInstr() || MemOps.count(&*I))
2254 continue;
2255 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2256 return false;
2257 if (I->mayStore() || (!isLd && I->mayLoad()))
2258 for (MachineInstr *MemOp : MemOps)
2259 if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2260 return false;
2261 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2262 MachineOperand &MO = I->getOperand(j);
2263 if (!MO.isReg())
2264 continue;
2265 Register Reg = MO.getReg();
2266 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2267 return false;
2268 if (Reg != Base && !MemRegs.count(Reg))
2269 AddedRegPressure.insert(Reg);
2270 }
2271 }
2272
2273 // Estimate register pressure increase due to the transformation.
2274 if (MemRegs.size() <= 4)
2275 // Ok if we are moving small number of instructions.
2276 return true;
2277 return AddedRegPressure.size() <= MemRegs.size() * 2;
2278}
2279
2280bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2281 MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
2282 Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
2283 Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
2284 // Make sure we're allowed to generate LDRD/STRD.
2285 if (!STI->hasV5TEOps())
2286 return false;
2287
2288 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2289 unsigned Scale = 1;
2290 unsigned Opcode = Op0->getOpcode();
2291 if (Opcode == ARM::LDRi12) {
2292 NewOpc = ARM::LDRD;
2293 } else if (Opcode == ARM::STRi12) {
2294 NewOpc = ARM::STRD;
2295 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2296 NewOpc = ARM::t2LDRDi8;
2297 Scale = 4;
2298 isT2 = true;
2299 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2300 NewOpc = ARM::t2STRDi8;
2301 Scale = 4;
2302 isT2 = true;
2303 } else {
2304 return false;
2305 }
2306
2307 // Make sure the base address satisfies i64 ld / st alignment requirement.
2308 // At the moment, we ignore the memoryoperand's value.
2309 // If we want to use AliasAnalysis, we should check it accordingly.
2310 if (!Op0->hasOneMemOperand() ||
2311 (*Op0->memoperands_begin())->isVolatile() ||
2312 (*Op0->memoperands_begin())->isAtomic())
2313 return false;
2314
2315 Align Alignment = (*Op0->memoperands_begin())->getAlign();
2316 Align ReqAlign = STI->getDualLoadStoreAlignment();
2317 if (Alignment < ReqAlign)
2318 return false;
2319
2320 // Then make sure the immediate offset fits.
2321 int OffImm = getMemoryOpOffset(*Op0);
2322 if (isT2) {
2323 int Limit = (1 << 8) * Scale;
2324 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2325 return false;
2326 Offset = OffImm;
2327 } else {
2329 if (OffImm < 0) {
2331 OffImm = - OffImm;
2332 }
2333 int Limit = (1 << 8) * Scale;
2334 if (OffImm >= Limit || (OffImm & (Scale-1)))
2335 return false;
2336 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2337 }
2338 FirstReg = Op0->getOperand(0).getReg();
2339 SecondReg = Op1->getOperand(0).getReg();
2340 if (FirstReg == SecondReg)
2341 return false;
2342 BaseReg = Op0->getOperand(1).getReg();
2343 Pred = getInstrPredicate(*Op0, PredReg);
2344 dl = Op0->getDebugLoc();
2345 return true;
2346}
2347
2348bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2349 MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops, unsigned Base,
2350 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2351 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> &RegisterMap) {
2352 bool RetVal = false;
2353
2354 // Sort by offset (in reverse order).
2355 llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2356 int LOffset = getMemoryOpOffset(*LHS);
2357 int ROffset = getMemoryOpOffset(*RHS);
2358 assert(LHS == RHS || LOffset != ROffset);
2359 return LOffset > ROffset;
2360 });
2361
2362 // The loads / stores of the same base are in order. Scan them from first to
2363 // last and check for the following:
2364 // 1. Any def of base.
2365 // 2. Any gaps.
2366 while (Ops.size() > 1) {
2367 unsigned FirstLoc = ~0U;
2368 unsigned LastLoc = 0;
2369 MachineInstr *FirstOp = nullptr;
2370 MachineInstr *LastOp = nullptr;
2371 int LastOffset = 0;
2372 unsigned LastOpcode = 0;
2373 unsigned LastBytes = 0;
2374 unsigned NumMove = 0;
2375 for (MachineInstr *Op : llvm::reverse(Ops)) {
2376 // Make sure each operation has the same kind.
2377 unsigned LSMOpcode
2378 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2379 if (LastOpcode && LSMOpcode != LastOpcode)
2380 break;
2381
2382 // Check that we have a continuous set of offsets.
2383 int Offset = getMemoryOpOffset(*Op);
2384 unsigned Bytes = getLSMultipleTransferSize(Op);
2385 if (LastBytes) {
2386 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2387 break;
2388 }
2389
2390 // Don't try to reschedule too many instructions.
2391 if (NumMove == InstReorderLimit)
2392 break;
2393
2394 // Found a mergeable instruction; save information about it.
2395 ++NumMove;
2396 LastOffset = Offset;
2397 LastBytes = Bytes;
2398 LastOpcode = LSMOpcode;
2399
2400 unsigned Loc = MI2LocMap[Op];
2401 if (Loc <= FirstLoc) {
2402 FirstLoc = Loc;
2403 FirstOp = Op;
2404 }
2405 if (Loc >= LastLoc) {
2406 LastLoc = Loc;
2407 LastOp = Op;
2408 }
2409 }
2410
2411 if (NumMove <= 1)
2412 Ops.pop_back();
2413 else {
2414 SmallPtrSet<MachineInstr*, 4> MemOps;
2415 SmallSet<unsigned, 4> MemRegs;
2416 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2417 MemOps.insert(Ops[i]);
2418 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2419 }
2420
2421 // Be conservative, if the instructions are too far apart, don't
2422 // move them. We want to limit the increase of register pressure.
2423 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2424 if (DoMove)
2425 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2426 MemOps, MemRegs, TRI, AA);
2427 if (!DoMove) {
2428 for (unsigned i = 0; i != NumMove; ++i)
2429 Ops.pop_back();
2430 } else {
2431 // This is the new location for the loads / stores.
2432 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2433 while (InsertPos != MBB->end() &&
2434 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2435 ++InsertPos;
2436
2437 // If we are moving a pair of loads / stores, see if it makes sense
2438 // to try to allocate a pair of registers that can form register pairs.
2439 MachineInstr *Op0 = Ops.back();
2440 MachineInstr *Op1 = Ops[Ops.size()-2];
2441 Register FirstReg, SecondReg;
2442 Register BaseReg, PredReg;
2444 bool isT2 = false;
2445 unsigned NewOpc = 0;
2446 int Offset = 0;
2447 DebugLoc dl;
2448 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2449 FirstReg, SecondReg, BaseReg,
2450 Offset, PredReg, Pred, isT2)) {
2451 Ops.pop_back();
2452 Ops.pop_back();
2453
2454 const MCInstrDesc &MCID = TII->get(NewOpc);
2455 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
2456 MRI->constrainRegClass(FirstReg, TRC);
2457 MRI->constrainRegClass(SecondReg, TRC);
2458
2459 // Form the pair instruction.
2460 if (isLd) {
2461 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2462 .addReg(FirstReg, RegState::Define)
2463 .addReg(SecondReg, RegState::Define)
2464 .addReg(BaseReg);
2465 // FIXME: We're converting from LDRi12 to an insn that still
2466 // uses addrmode2, so we need an explicit offset reg. It should
2467 // always by reg0 since we're transforming LDRi12s.
2468 if (!isT2)
2469 MIB.addReg(0);
2470 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2471 MIB.cloneMergedMemRefs({Op0, Op1});
2472 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2473 ++NumLDRDFormed;
2474 } else {
2475 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2476 .addReg(FirstReg)
2477 .addReg(SecondReg)
2478 .addReg(BaseReg);
2479 // FIXME: We're converting from LDRi12 to an insn that still
2480 // uses addrmode2, so we need an explicit offset reg. It should
2481 // always by reg0 since we're transforming STRi12s.
2482 if (!isT2)
2483 MIB.addReg(0);
2484 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2485 MIB.cloneMergedMemRefs({Op0, Op1});
2486 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2487 ++NumSTRDFormed;
2488 }
2489 MBB->erase(Op0);
2490 MBB->erase(Op1);
2491
2492 if (!isT2) {
2493 // Add register allocation hints to form register pairs.
2494 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2495 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2496 }
2497 } else {
2498 for (unsigned i = 0; i != NumMove; ++i) {
2499 MachineInstr *Op = Ops.pop_back_val();
2500 if (isLd) {
2501 // Populate RegisterMap with all Registers defined by loads.
2502 Register Reg = Op->getOperand(0).getReg();
2503 RegisterMap[Reg];
2504 }
2505
2506 MBB->splice(InsertPos, MBB, Op);
2507 }
2508 }
2509
2510 NumLdStMoved += NumMove;
2511 RetVal = true;
2512 }
2513 }
2514 }
2515
2516 return RetVal;
2517}
2518
2520 std::function<void(MachineOperand &)> Fn) {
2521 if (MI->isNonListDebugValue()) {
2522 auto &Op = MI->getOperand(0);
2523 if (Op.isReg())
2524 Fn(Op);
2525 } else {
2526 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2527 auto &Op = MI->getOperand(I);
2528 if (Op.isReg())
2529 Fn(Op);
2530 }
2531 }
2532}
2533
2534// Update the RegisterMap with the instruction that was moved because a
2535// DBG_VALUE_LIST may need to be moved again.
2538 MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
2539
2540 forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
2541 auto RegIt = RegisterMap.find(Op.getReg());
2542 if (RegIt == RegisterMap.end())
2543 return;
2544 auto &InstrVec = RegIt->getSecond();
2545 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
2546 });
2547}
2548
2550 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2551 MI->getDebugLoc()->getInlinedAt());
2552 return DbgVar;
2553}
2554
2555bool
2556ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2557 bool RetVal = false;
2558
2559 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2560 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2561 using BaseVec = SmallVector<unsigned, 4>;
2562 Base2InstMap Base2LdsMap;
2563 Base2InstMap Base2StsMap;
2564 BaseVec LdBases;
2565 BaseVec StBases;
2566 // This map is used to track the relationship between the virtual
2567 // register that is the result of a load that is moved and the DBG_VALUE
2568 // MachineInstr pointer that uses that virtual register.
2569 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2570
2571 unsigned Loc = 0;
2574 while (MBBI != E) {
2575 for (; MBBI != E; ++MBBI) {
2576 MachineInstr &MI = *MBBI;
2577 if (MI.isCall() || MI.isTerminator()) {
2578 // Stop at barriers.
2579 ++MBBI;
2580 break;
2581 }
2582
2583 if (!MI.isDebugInstr())
2584 MI2LocMap[&MI] = ++Loc;
2585
2586 if (!isMemoryOp(MI))
2587 continue;
2588 Register PredReg;
2589 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2590 continue;
2591
2592 int Opc = MI.getOpcode();
2593 bool isLd = isLoadSingle(Opc);
2594 Register Base = MI.getOperand(1).getReg();
2596 bool StopHere = false;
2597 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2598 auto [BI, Inserted] = Base2Ops.try_emplace(Base);
2599 if (Inserted) {
2600 BI->second.push_back(&MI);
2601 Bases.push_back(Base);
2602 return;
2603 }
2604 for (const MachineInstr *MI : BI->second) {
2605 if (Offset == getMemoryOpOffset(*MI)) {
2606 StopHere = true;
2607 break;
2608 }
2609 }
2610 if (!StopHere)
2611 BI->second.push_back(&MI);
2612 };
2613
2614 if (isLd)
2615 FindBases(Base2LdsMap, LdBases);
2616 else
2617 FindBases(Base2StsMap, StBases);
2618
2619 if (StopHere) {
2620 // Found a duplicate (a base+offset combination that's seen earlier).
2621 // Backtrack.
2622 --Loc;
2623 break;
2624 }
2625 }
2626
2627 // Re-schedule loads.
2628 for (unsigned Base : LdBases) {
2629 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2630 if (Lds.size() > 1)
2631 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2632 }
2633
2634 // Re-schedule stores.
2635 for (unsigned Base : StBases) {
2636 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2637 if (Sts.size() > 1)
2638 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2639 }
2640
2641 if (MBBI != E) {
2642 Base2LdsMap.clear();
2643 Base2StsMap.clear();
2644 LdBases.clear();
2645 StBases.clear();
2646 }
2647 }
2648
2649 // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
2650 // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
2651 // use-before-def, resulting in a loss of debug info.
2652
2653 // Example:
2654 // Before the Pre Register Allocation Load Store Pass
2655 // inst_a
2656 // %2 = ld ...
2657 // inst_b
2658 // DBG_VALUE %2, "x", ...
2659 // %3 = ld ...
2660
2661 // After the Pass:
2662 // inst_a
2663 // inst_b
2664 // DBG_VALUE %2, "x", ...
2665 // %2 = ld ...
2666 // %3 = ld ...
2667
2668 // The code below addresses this by moving the DBG_VALUE to the position
2669 // immediately after the load.
2670
2671 // Example:
2672 // After the code below:
2673 // inst_a
2674 // inst_b
2675 // %2 = ld ...
2676 // DBG_VALUE %2, "x", ...
2677 // %3 = ld ...
2678
2679 // The algorithm works in two phases: First RescheduleOps() populates the
2680 // RegisterMap with registers that were moved as keys, there is no value
2681 // inserted. In the next phase, every MachineInstr in a basic block is
2682 // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
2683 // or more registers in the RegisterMap, the RegisterMap and InstrMap are
2684 // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
2685 // describes debug information for a variable that already exists in the
2686 // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
2687 // be set to undef. If the current MachineInstr is a load that was moved,
2688 // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
2689 // the load.
2690
2691 // To illustrate the above algorithm visually let's take this example.
2692
2693 // Before the Pre Register Allocation Load Store Pass:
2694 // %2 = ld ...
2695 // DBG_VALUE %2, A, .... # X
2696 // DBG_VALUE 0, A, ... # Y
2697 // %3 = ld ...
2698 // DBG_VALUE %3, A, ..., # Z
2699 // %4 = ld ...
2700
2701 // After Pre Register Allocation Load Store Pass:
2702 // DBG_VALUE %2, A, .... # X
2703 // DBG_VALUE 0, A, ... # Y
2704 // DBG_VALUE %3, A, ..., # Z
2705 // %2 = ld ...
2706 // %3 = ld ...
2707 // %4 = ld ...
2708
2709 // The algorithm below does the following:
2710
2711 // In the beginning, the RegisterMap will have been populated with the virtual
2712 // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
2713 // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
2714 // InstrMap {}
2715 // -> DBG_VALUE %2, A, .... # X
2716 // DBG_VALUE 0, A, ... # Y
2717 // DBG_VALUE %3, A, ..., # Z
2718 // %2 = ld ...
2719 // %3 = ld ...
2720 // %4 = ld ...
2721
2722 // After the first DBG_VALUE (denoted with an X) is processed, the
2723 // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
2724 // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
2725 // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
2726 // DBG_VALUE %2, A, .... # X
2727 // -> DBG_VALUE 0, A, ... # Y
2728 // DBG_VALUE %3, A, ..., # Z
2729 // %2 = ld ...
2730 // %3 = ld ...
2731 // %4 = ld ...
2732
2733 // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
2734 // to now hold Y for A and the RegisterMap is also updated to remove X from
2735 // %2, this is because both X and Y describe the same debug variable A. X is
2736 // also updated to have a $noreg as the first operand.
2737 // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
2738 // InstrMap = {X-> 2}
2739 // DBG_VALUE $noreg, A, .... # X
2740 // DBG_VALUE 0, A, ... # Y
2741 // -> DBG_VALUE %3, A, ..., # Z
2742 // %2 = ld ...
2743 // %3 = ld ...
2744 // %4 = ld ...
2745
2746 // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
2747 // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
2748 // is updated to have Z mapped to %3. This is again because Z describes the
2749 // debug variable A, Y is not updated to have $noreg as first operand because
2750 // its first operand is an immediate, not a register.
2751 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2752 // InstrMap = {X -> 2, Z -> 3}
2753 // DBG_VALUE $noreg, A, .... # X
2754 // DBG_VALUE 0, A, ... # Y
2755 // DBG_VALUE %3, A, ..., # Z
2756 // -> %2 = ld ...
2757 // %3 = ld ...
2758 // %4 = ld ...
2759
2760 // Nothing happens here since the RegisterMap for %2 contains no value.
2761 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2762 // InstrMap = {X -> 2, Z -> 3}
2763 // DBG_VALUE $noreg, A, .... # X
2764 // DBG_VALUE 0, A, ... # Y
2765 // DBG_VALUE %3, A, ..., # Z
2766 // %2 = ld ...
2767 // -> %3 = ld ...
2768 // %4 = ld ...
2769
2770 // Since the RegisterMap contains Z as a value for %3, the MachineInstr
2771 // pointer Z is copied to come after the load for %3 and the old Z's first
2772 // operand is changed to $noreg the Basic Block iterator is moved to after the
2773 // DBG_VALUE Z's new position.
2774 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2775 // InstrMap = {X -> 2, Z -> 3}
2776 // DBG_VALUE $noreg, A, .... # X
2777 // DBG_VALUE 0, A, ... # Y
2778 // DBG_VALUE $noreg, A, ..., # Old Z
2779 // %2 = ld ...
2780 // %3 = ld ...
2781 // DBG_VALUE %3, A, ..., # Z
2782 // -> %4 = ld ...
2783
2784 // Nothing happens for %4 and the algorithm exits having processed the entire
2785 // Basic Block.
2786 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2787 // InstrMap = {X -> 2, Z -> 3}
2788 // DBG_VALUE $noreg, A, .... # X
2789 // DBG_VALUE 0, A, ... # Y
2790 // DBG_VALUE $noreg, A, ..., # Old Z
2791 // %2 = ld ...
2792 // %3 = ld ...
2793 // DBG_VALUE %3, A, ..., # Z
2794 // %4 = ld ...
2795
2796 // This map is used to track the relationship between
2797 // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
2798 // debug information for that Debug Variable.
2799 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2800 // This map is used to track the relationship between a DBG_VALUE or
2801 // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
2802 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2803 for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
2804 MachineInstr &MI = *MBBI;
2805
2806 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2807 auto RegIt = RegisterMap.find(Reg);
2808 if (RegIt == RegisterMap.end())
2809 return;
2810 auto &InstrVec = RegIt->getSecond();
2811 InstrVec.push_back(&MI);
2812 InstrMap[&MI].push_back(Reg);
2813 };
2814
2815 if (MI.isDebugValue()) {
2816 assert(MI.getDebugVariable() &&
2817 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2818
2820 // If the first operand is a register and it exists in the RegisterMap, we
2821 // know this is a DBG_VALUE that uses the result of a load that was moved,
2822 // and is therefore a candidate to also be moved, add it to the
2823 // RegisterMap and InstrMap.
2824 forEachDbgRegOperand(&MI, [&](MachineOperand &Op) {
2825 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2826 });
2827
2828 // If the current DBG_VALUE describes the same variable as one of the
2829 // in-flight DBG_VALUEs, remove the candidate from the list and set it to
2830 // undef. Moving one DBG_VALUE past another would result in the variable's
2831 // value going back in time when stepping through the block in the
2832 // debugger.
2833 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2834 if (InstrIt != DbgValueSinkCandidates.end()) {
2835 auto *Instr = InstrIt->getSecond();
2836 auto RegIt = InstrMap.find(Instr);
2837 if (RegIt != InstrMap.end()) {
2838 const auto &RegVec = RegIt->getSecond();
2839 // For every Register in the RegVec, remove the MachineInstr in the
2840 // RegisterMap that describes the DbgVar.
2841 for (auto &Reg : RegVec) {
2842 auto RegIt = RegisterMap.find(Reg);
2843 if (RegIt == RegisterMap.end())
2844 continue;
2845 auto &InstrVec = RegIt->getSecond();
2846 auto IsDbgVar = [&](MachineInstr *I) -> bool {
2848 return Var == DbgVar;
2849 };
2850
2851 llvm::erase_if(InstrVec, IsDbgVar);
2852 }
2854 [&](MachineOperand &Op) { Op.setReg(0); });
2855 }
2856 }
2857 DbgValueSinkCandidates[DbgVar] = &MI;
2858 } else {
2859 // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
2860 // then move that DBG_VALUE to below the load.
2861 auto Opc = MI.getOpcode();
2862 if (!isLoadSingle(Opc))
2863 continue;
2864 auto Reg = MI.getOperand(0).getReg();
2865 auto RegIt = RegisterMap.find(Reg);
2866 if (RegIt == RegisterMap.end())
2867 continue;
2868 auto &DbgInstrVec = RegIt->getSecond();
2869 if (!DbgInstrVec.size())
2870 continue;
2871 for (auto *DbgInstr : DbgInstrVec) {
2872 MachineBasicBlock::iterator InsertPos = std::next(MBBI);
2873 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2874 MBB->insert(InsertPos, ClonedMI);
2875 MBBI++;
2876 // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
2877 // that was moved.
2878 auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
2879 // Erase DbgVar from DbgValueSinkCandidates if still present. If the
2880 // instruction is a DBG_VALUE_LIST, it may have already been erased from
2881 // DbgValueSinkCandidates.
2882 DbgValueSinkCandidates.erase(DbgVar);
2883 // Zero out original dbg instr
2884 forEachDbgRegOperand(DbgInstr,
2885 [&](MachineOperand &Op) { Op.setReg(0); });
2886 // Update RegisterMap with ClonedMI because it might have to be moved
2887 // again.
2888 if (DbgInstr->isDebugValueList())
2889 updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
2890 DbgInstr);
2891 }
2892 }
2893 }
2894 return RetVal;
2895}
2896
2897// Get the Base register operand index from the memory access MachineInst if we
2898// should attempt to distribute postinc on it. Return -1 if not of a valid
2899// instruction type. If it returns an index, it is assumed that instruction is a
2900// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2902 switch (MI.getOpcode()) {
2903 case ARM::MVE_VLDRBS16:
2904 case ARM::MVE_VLDRBS32:
2905 case ARM::MVE_VLDRBU16:
2906 case ARM::MVE_VLDRBU32:
2907 case ARM::MVE_VLDRHS32:
2908 case ARM::MVE_VLDRHU32:
2909 case ARM::MVE_VLDRBU8:
2910 case ARM::MVE_VLDRHU16:
2911 case ARM::MVE_VLDRWU32:
2912 case ARM::MVE_VSTRB16:
2913 case ARM::MVE_VSTRB32:
2914 case ARM::MVE_VSTRH32:
2915 case ARM::MVE_VSTRBU8:
2916 case ARM::MVE_VSTRHU16:
2917 case ARM::MVE_VSTRWU32:
2918 case ARM::t2LDRHi8:
2919 case ARM::t2LDRHi12:
2920 case ARM::t2LDRSHi8:
2921 case ARM::t2LDRSHi12:
2922 case ARM::t2LDRBi8:
2923 case ARM::t2LDRBi12:
2924 case ARM::t2LDRSBi8:
2925 case ARM::t2LDRSBi12:
2926 case ARM::t2STRBi8:
2927 case ARM::t2STRBi12:
2928 case ARM::t2STRHi8:
2929 case ARM::t2STRHi12:
2930 return 1;
2931 case ARM::MVE_VLDRBS16_post:
2932 case ARM::MVE_VLDRBS32_post:
2933 case ARM::MVE_VLDRBU16_post:
2934 case ARM::MVE_VLDRBU32_post:
2935 case ARM::MVE_VLDRHS32_post:
2936 case ARM::MVE_VLDRHU32_post:
2937 case ARM::MVE_VLDRBU8_post:
2938 case ARM::MVE_VLDRHU16_post:
2939 case ARM::MVE_VLDRWU32_post:
2940 case ARM::MVE_VSTRB16_post:
2941 case ARM::MVE_VSTRB32_post:
2942 case ARM::MVE_VSTRH32_post:
2943 case ARM::MVE_VSTRBU8_post:
2944 case ARM::MVE_VSTRHU16_post:
2945 case ARM::MVE_VSTRWU32_post:
2946 case ARM::MVE_VLDRBS16_pre:
2947 case ARM::MVE_VLDRBS32_pre:
2948 case ARM::MVE_VLDRBU16_pre:
2949 case ARM::MVE_VLDRBU32_pre:
2950 case ARM::MVE_VLDRHS32_pre:
2951 case ARM::MVE_VLDRHU32_pre:
2952 case ARM::MVE_VLDRBU8_pre:
2953 case ARM::MVE_VLDRHU16_pre:
2954 case ARM::MVE_VLDRWU32_pre:
2955 case ARM::MVE_VSTRB16_pre:
2956 case ARM::MVE_VSTRB32_pre:
2957 case ARM::MVE_VSTRH32_pre:
2958 case ARM::MVE_VSTRBU8_pre:
2959 case ARM::MVE_VSTRHU16_pre:
2960 case ARM::MVE_VSTRWU32_pre:
2961 return 2;
2962 }
2963 return -1;
2964}
2965
2967 switch (MI.getOpcode()) {
2968 case ARM::MVE_VLDRBS16_post:
2969 case ARM::MVE_VLDRBS32_post:
2970 case ARM::MVE_VLDRBU16_post:
2971 case ARM::MVE_VLDRBU32_post:
2972 case ARM::MVE_VLDRHS32_post:
2973 case ARM::MVE_VLDRHU32_post:
2974 case ARM::MVE_VLDRBU8_post:
2975 case ARM::MVE_VLDRHU16_post:
2976 case ARM::MVE_VLDRWU32_post:
2977 case ARM::MVE_VSTRB16_post:
2978 case ARM::MVE_VSTRB32_post:
2979 case ARM::MVE_VSTRH32_post:
2980 case ARM::MVE_VSTRBU8_post:
2981 case ARM::MVE_VSTRHU16_post:
2982 case ARM::MVE_VSTRWU32_post:
2983 return true;
2984 }
2985 return false;
2986}
2987
2989 switch (MI.getOpcode()) {
2990 case ARM::MVE_VLDRBS16_pre:
2991 case ARM::MVE_VLDRBS32_pre:
2992 case ARM::MVE_VLDRBU16_pre:
2993 case ARM::MVE_VLDRBU32_pre:
2994 case ARM::MVE_VLDRHS32_pre:
2995 case ARM::MVE_VLDRHU32_pre:
2996 case ARM::MVE_VLDRBU8_pre:
2997 case ARM::MVE_VLDRHU16_pre:
2998 case ARM::MVE_VLDRWU32_pre:
2999 case ARM::MVE_VSTRB16_pre:
3000 case ARM::MVE_VSTRB32_pre:
3001 case ARM::MVE_VSTRH32_pre:
3002 case ARM::MVE_VSTRBU8_pre:
3003 case ARM::MVE_VSTRHU16_pre:
3004 case ARM::MVE_VSTRWU32_pre:
3005 return true;
3006 }
3007 return false;
3008}
3009
3010// Given a memory access Opcode, check that the give Imm would be a valid Offset
3011// for this instruction (same as isLegalAddressImm), Or if the instruction
3012// could be easily converted to one where that was valid. For example converting
3013// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
3014// AdjustBaseAndOffset below.
3015static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm,
3016 const TargetInstrInfo *TII,
3017 int &CodesizeEstimate) {
3018 if (isLegalAddressImm(Opcode, Imm, TII))
3019 return true;
3020
3021 // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
3022 const MCInstrDesc &Desc = TII->get(Opcode);
3023 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
3024 switch (AddrMode) {
3026 CodesizeEstimate += 1;
3027 return Imm < 0 && -Imm < ((1 << 8) * 1);
3028 }
3029 return false;
3030}
3031
3032// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
3033// by -Offset. This can either happen in-place or be a replacement as MI is
3034// converted to another instruction type.
3036 int Offset, const TargetInstrInfo *TII,
3037 const TargetRegisterInfo *TRI) {
3038 // Set the Base reg
3039 unsigned BaseOp = getBaseOperandIndex(*MI);
3040 MI->getOperand(BaseOp).setReg(NewBaseReg);
3041 // and constrain the reg class to that required by the instruction.
3042 MachineFunction *MF = MI->getMF();
3043 MachineRegisterInfo &MRI = MF->getRegInfo();
3044 const MCInstrDesc &MCID = TII->get(MI->getOpcode());
3045 const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp);
3046 MRI.constrainRegClass(NewBaseReg, TRC);
3047
3048 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3049 if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
3050 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3051 else {
3052 unsigned ConvOpcode;
3053 switch (MI->getOpcode()) {
3054 case ARM::t2LDRHi12:
3055 ConvOpcode = ARM::t2LDRHi8;
3056 break;
3057 case ARM::t2LDRSHi12:
3058 ConvOpcode = ARM::t2LDRSHi8;
3059 break;
3060 case ARM::t2LDRBi12:
3061 ConvOpcode = ARM::t2LDRBi8;
3062 break;
3063 case ARM::t2LDRSBi12:
3064 ConvOpcode = ARM::t2LDRSBi8;
3065 break;
3066 case ARM::t2STRHi12:
3067 ConvOpcode = ARM::t2STRHi8;
3068 break;
3069 case ARM::t2STRBi12:
3070 ConvOpcode = ARM::t2STRBi8;
3071 break;
3072 default:
3073 llvm_unreachable("Unhandled convertible opcode");
3074 }
3075 assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
3076 "Illegal Address Immediate after convert!");
3077
3078 const MCInstrDesc &MCID = TII->get(ConvOpcode);
3079 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3080 .add(MI->getOperand(0))
3081 .add(MI->getOperand(1))
3082 .addImm(OldOffset - Offset)
3083 .add(MI->getOperand(3))
3084 .add(MI->getOperand(4))
3085 .cloneMemRefs(*MI);
3086 MI->eraseFromParent();
3087 }
3088}
3089
3091 Register NewReg,
3092 const TargetInstrInfo *TII,
3093 const TargetRegisterInfo *TRI) {
3094 MachineFunction *MF = MI->getMF();
3095 MachineRegisterInfo &MRI = MF->getRegInfo();
3096
3097 unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
3098 MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
3099
3100 const MCInstrDesc &MCID = TII->get(NewOpcode);
3101 // Constrain the def register class
3102 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
3103 MRI.constrainRegClass(NewReg, TRC);
3104 // And do the same for the base operand
3105 TRC = TII->getRegClass(MCID, 2);
3106 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3107
3108 unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
3109 switch (AddrMode) {
3113 // Any MVE load/store
3114 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3115 .addReg(NewReg, RegState::Define)
3116 .add(MI->getOperand(0))
3117 .add(MI->getOperand(1))
3118 .addImm(Offset)
3119 .add(MI->getOperand(3))
3120 .add(MI->getOperand(4))
3121 .add(MI->getOperand(5))
3122 .cloneMemRefs(*MI);
3124 if (MI->mayLoad()) {
3125 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3126 .add(MI->getOperand(0))
3127 .addReg(NewReg, RegState::Define)
3128 .add(MI->getOperand(1))
3129 .addImm(Offset)
3130 .add(MI->getOperand(3))
3131 .add(MI->getOperand(4))
3132 .cloneMemRefs(*MI);
3133 } else {
3134 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3135 .addReg(NewReg, RegState::Define)
3136 .add(MI->getOperand(0))
3137 .add(MI->getOperand(1))
3138 .addImm(Offset)
3139 .add(MI->getOperand(3))
3140 .add(MI->getOperand(4))
3141 .cloneMemRefs(*MI);
3142 }
3143 default:
3144 llvm_unreachable("Unhandled createPostIncLoadStore");
3145 }
3146}
3147
3148// Given a Base Register, optimise the load/store uses to attempt to create more
3149// post-inc accesses and less register moves. We do this by taking zero offset
3150// loads/stores with an add, and convert them to a postinc load/store of the
3151// same type. Any subsequent accesses will be adjusted to use and account for
3152// the post-inc value.
3153// For example:
3154// LDR #0 LDR_POSTINC #16
3155// LDR #4 LDR #-12
3156// LDR #8 LDR #-8
3157// LDR #12 LDR #-4
3158// ADD #16
3159//
3160// At the same time if we do not find an increment but do find an existing
3161// pre/post inc instruction, we can still adjust the offsets of subsequent
3162// instructions to save the register move that would otherwise be needed for the
3163// in-place increment.
3164bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3165 // We are looking for:
3166 // One zero offset load/store that can become postinc
3167 MachineInstr *BaseAccess = nullptr;
3168 MachineInstr *PrePostInc = nullptr;
3169 // An increment that can be folded in
3170 MachineInstr *Increment = nullptr;
3171 // Other accesses after BaseAccess that will need to be updated to use the
3172 // postinc value.
3173 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3174 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3175 if (!Increment && getAddSubImmediate(Use) != 0) {
3176 Increment = &Use;
3177 continue;
3178 }
3179
3180 int BaseOp = getBaseOperandIndex(Use);
3181 if (BaseOp == -1)
3182 return false;
3183
3184 if (!Use.getOperand(BaseOp).isReg() ||
3185 Use.getOperand(BaseOp).getReg() != Base)
3186 return false;
3187 if (isPreIndex(Use) || isPostIndex(Use))
3188 PrePostInc = &Use;
3189 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3190 BaseAccess = &Use;
3191 else
3192 OtherAccesses.insert(&Use);
3193 }
3194
3195 int IncrementOffset;
3196 Register NewBaseReg;
3197 if (BaseAccess && Increment) {
3198 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3199 return false;
3200 Register PredReg;
3201 if (Increment->definesRegister(ARM::CPSR, /*TRI=*/nullptr) ||
3203 return false;
3204
3205 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3206 << Base.virtRegIndex() << "\n");
3207
3208 // Make sure that Increment has no uses before BaseAccess that are not PHI
3209 // uses.
3210 for (MachineInstr &Use :
3211 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3212 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3213 !DT->dominates(BaseAccess, &Use))) {
3214 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3215 return false;
3216 }
3217 }
3218
3219 // Make sure that Increment can be folded into Base
3220 IncrementOffset = getAddSubImmediate(*Increment);
3221 unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
3222 BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
3223 if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
3224 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3225 return false;
3226 }
3227 }
3228 else if (PrePostInc) {
3229 // If we already have a pre/post index load/store then set BaseAccess,
3230 // IncrementOffset and NewBaseReg to the values it already produces,
3231 // allowing us to update and subsequent uses of BaseOp reg with the
3232 // incremented value.
3233 if (Increment)
3234 return false;
3235
3236 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3237 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3238 int BaseOp = getBaseOperandIndex(*PrePostInc);
3239 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3240 BaseAccess = PrePostInc;
3241 NewBaseReg = PrePostInc->getOperand(0).getReg();
3242 }
3243 else
3244 return false;
3245
3246 // And make sure that the negative value of increment can be added to all
3247 // other offsets after the BaseAccess. We rely on either
3248 // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
3249 // to keep things simple.
3250 // This also adds a simple codesize metric, to detect if an instruction (like
3251 // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
3252 // cannot because it is converted to something else (t2LDRBi8). We start this
3253 // at -1 for the gain from removing the increment.
3254 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3255 int CodesizeEstimate = -1;
3256 for (auto *Use : OtherAccesses) {
3257 if (DT->dominates(BaseAccess, Use)) {
3258 SuccessorAccesses.insert(Use);
3259 unsigned BaseOp = getBaseOperandIndex(*Use);
3260 if (!isLegalOrConvertibleAddressImm(Use->getOpcode(),
3261 Use->getOperand(BaseOp + 1).getImm() -
3262 IncrementOffset,
3263 TII, CodesizeEstimate)) {
3264 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3265 return false;
3266 }
3267 } else if (!DT->dominates(Use, BaseAccess)) {
3268 LLVM_DEBUG(
3269 dbgs() << " Unknown dominance relation between Base and Use\n");
3270 return false;
3271 }
3272 }
3273 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3274 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3275 return false;
3276 }
3277
3278 if (!PrePostInc) {
3279 // Replace BaseAccess with a post inc
3280 LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
3281 LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
3282 NewBaseReg = Increment->getOperand(0).getReg();
3283 MachineInstr *BaseAccessPost =
3284 createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
3285 BaseAccess->eraseFromParent();
3286 Increment->eraseFromParent();
3287 (void)BaseAccessPost;
3288 LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
3289 }
3290
3291 for (auto *Use : SuccessorAccesses) {
3292 LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
3293 AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
3294 LLVM_DEBUG(dbgs() << " To : "; Use->dump());
3295 }
3296
3297 // Remove the kill flag from all uses of NewBaseReg, in case any old uses
3298 // remain.
3299 for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
3300 Op.setIsKill(false);
3301 return true;
3302}
3303
3304bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3305 bool Changed = false;
3306 SmallSetVector<Register, 4> Visited;
3307 for (auto &MBB : *MF) {
3308 for (auto &MI : MBB) {
3309 int BaseOp = getBaseOperandIndex(MI);
3310 if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
3311 continue;
3312
3313 Register Base = MI.getOperand(BaseOp).getReg();
3314 if (!Base.isVirtual())
3315 continue;
3316
3317 Visited.insert(Base);
3318 }
3319 }
3320
3321 for (auto Base : Visited)
3322 Changed |= DistributeIncrements(Base);
3323
3324 return Changed;
3325}
3326
3327/// Returns an instance of the load / store optimization pass.
3329 if (PreAlloc)
3330 return new ARMPreAllocLoadStoreOptLegacy();
3331 return new ARMLoadStoreOptLegacy();
3332}
3333
3337 ARMLoadStoreOpt Impl;
3338 bool Changed = Impl.runOnMachineFunction(MF);
3339 if (!Changed)
3340 return PreservedAnalyses::all();
3343 return PA;
3344}
3345
3349 ARMPreAllocLoadStoreOpt Impl;
3350 AliasAnalysis *AA =
3352 .getManager()
3353 .getResult<AAManager>(MF.getFunction());
3355 bool Changed = Impl.runOnMachineFunction(MF, AA, DT);
3356 if (!Changed)
3357 return PreservedAnalyses::all();
3360 return PA;
3361}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static bool isLoadSingle(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
A set of register units.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb2() const
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
bool hasMinSize() const
bool isCortexM3() const
Align getDualLoadStoreAlignment() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition Allocator.h:390
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
Definition Value.h:75
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ARM
Windows AXP64.
Definition MCAsmInfo.h:47
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
This namespace contains all of the command line option processing machinery.
Definition CommandLine.h:52
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
BBIterator iterator
Definition BasicBlock.h:87
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1630
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr RegState getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
unsigned M1(unsigned Val)
Definition VE.h:377
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr RegState getDefRegState(bool B)
FunctionPass * createARMLoadStoreOptLegacyPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1910
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr RegState getUndefRegState(bool B)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39