LLVM 23.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringRef.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCDwarf.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
45#include <cassert>
46#include <cstdint>
47#include <functional>
48#include <iterator>
49#include <limits>
50#include <optional>
51
52using namespace llvm;
53
54#define DEBUG_TYPE "aarch64-ldst-opt"
55
56STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
57STATISTIC(NumPostFolded, "Number of post-index updates folded");
58STATISTIC(NumPreFolded, "Number of pre-index updates folded");
59STATISTIC(NumUnscaledPairCreated,
60 "Number of load/store from unscaled generated");
61STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
62STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
63STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
64 "not passed the alignment check");
65STATISTIC(NumConstOffsetFolded,
66 "Number of const offset of index address folded");
67
68DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
69 "Controls which pairs are considered for renaming");
70
71// The LdStLimit limits how far we search for load/store pairs.
72static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
73 cl::init(20), cl::Hidden);
74
75// The UpdateLimit limits how far we search for update instructions when we form
76// pre-/post-index instructions.
77static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
79
80// The LdStConstLimit limits how far we search for const offset instructions
81// when we form index address load/store instructions.
82static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
83 cl::init(10), cl::Hidden);
84
85// Enable register renaming to find additional store pairing opportunities.
86static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
87 cl::init(true), cl::Hidden);
88
89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
90
91namespace {
92
93using LdStPairFlags = struct LdStPairFlags {
94 // If a matching instruction is found, MergeForward is set to true if the
95 // merge is to remove the first instruction and replace the second with
96 // a pair-wise insn, and false if the reverse is true.
97 bool MergeForward = false;
98
99 // SExtIdx gives the index of the result of the load pair that must be
100 // extended. The value of SExtIdx assumes that the paired load produces the
101 // value in this order: (I, returned iterator), i.e., -1 means no value has
102 // to be extended, 0 means I, and 1 means the returned iterator.
103 int SExtIdx = -1;
104
105 // If not none, RenameReg can be used to rename the result register of the
106 // first store in a pair. Currently this only works when merging stores
107 // forward.
108 std::optional<MCPhysReg> RenameReg;
109
110 LdStPairFlags() = default;
111
112 void setMergeForward(bool V = true) { MergeForward = V; }
113 bool getMergeForward() const { return MergeForward; }
114
115 void setSExtIdx(int V) { SExtIdx = V; }
116 int getSExtIdx() const { return SExtIdx; }
117
118 void setRenameReg(MCPhysReg R) { RenameReg = R; }
119 void clearRenameReg() { RenameReg = std::nullopt; }
120 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
121};
122
123struct AArch64LoadStoreOpt {
125 const AArch64InstrInfo *TII;
126 const TargetRegisterInfo *TRI;
127 const AArch64Subtarget *Subtarget;
128
129 // Track which register units have been modified and used.
130 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
131 LiveRegUnits DefinedInBB;
132
133 // Scan the instructions looking for a load/store that can be combined
134 // with the current instruction into a load/store pair.
135 // Return the matching instruction if one is found, else MBB->end().
137 LdStPairFlags &Flags,
138 unsigned Limit,
139 bool FindNarrowMerge);
140
141 // Scan the instructions looking for a store that writes to the address from
142 // which the current load instruction reads. Return true if one is found.
143 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
145
146 // Merge the two instructions indicated into a wider narrow store instruction.
148 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
150 const LdStPairFlags &Flags);
151
152 // Merge the two instructions indicated into a single pair-wise instruction.
154 mergePairedInsns(MachineBasicBlock::iterator I,
156 const LdStPairFlags &Flags);
157
158 // Promote the load that reads directly from the address stored to.
160 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
162
163 // Scan the instruction list to find a base register update that can
164 // be combined with the current instruction (a load or store) using
165 // pre or post indexed addressing with writeback. Scan forwards.
167 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
168 int UnscaledOffset, unsigned Limit);
169
170 // Scan the instruction list to find a register assigned with a const
171 // value that can be combined with the current instruction (a load or store)
172 // using base addressing with writeback. Scan backwards.
174 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
175 unsigned &Offset);
176
177 // Scan the instruction list to find a base register update that can
178 // be combined with the current instruction (a load or store) using
179 // pre or post indexed addressing with writeback. Scan backwards.
180 // `MergeEither` is set to true if the combined instruction may be placed
181 // either at the location of the load/store instruction or at the location of
182 // the update instruction.
184 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
185 bool &MergeEither);
186
187 // Find an instruction that updates the base register of the ld/st
188 // instruction.
189 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
190 unsigned BaseReg, int Offset);
191
192 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
193 unsigned IndexReg, unsigned &Offset);
194
195 // Merge a pre- or post-index base register update into a ld/st instruction.
196 std::optional<MachineBasicBlock::iterator>
197 mergeUpdateInsn(MachineBasicBlock::iterator I,
198 MachineBasicBlock::iterator Update, bool IsForward,
199 bool IsPreIdx, bool MergeEither);
200
202 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
203 MachineBasicBlock::iterator Update, unsigned Offset,
204 int Scale);
205
206 // Find and merge zero store instructions.
207 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
208
209 // Find and pair ldr/str instructions.
210 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
211
212 // Find and promote load instructions which read directly from store.
213 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
214
215 // Find and merge a base register updates before or after a ld/st instruction.
216 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
217
218 // Find and merge an index ldr/st instruction into a base ld/st instruction.
219 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
220
221 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
222
223 bool runOnMachineFunction(MachineFunction &MF);
224};
225
226struct AArch64LoadStoreOptLegacy : public MachineFunctionPass {
227 static char ID;
228
229 AArch64LoadStoreOptLegacy() : MachineFunctionPass(ID) {}
230
231 bool runOnMachineFunction(MachineFunction &Fn) override;
232
233 void getAnalysisUsage(AnalysisUsage &AU) const override {
236 }
237
238 MachineFunctionProperties getRequiredProperties() const override {
239 return MachineFunctionProperties().setNoVRegs();
240 }
241
242 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
243};
244
245char AArch64LoadStoreOptLegacy::ID = 0;
246
247} // end anonymous namespace
248
249INITIALIZE_PASS(AArch64LoadStoreOptLegacy, "aarch64-ldst-opt",
250 AARCH64_LOAD_STORE_OPT_NAME, false, false)
251
252static bool isNarrowStore(unsigned Opc) {
253 switch (Opc) {
254 default:
255 return false;
256 case AArch64::STRBBui:
257 case AArch64::STURBBi:
258 case AArch64::STRHHui:
259 case AArch64::STURHHi:
260 return true;
261 }
262}
263
264// These instruction set memory tag and either keep memory contents unchanged or
265// set it to zero, ignoring the address part of the source register.
266static bool isTagStore(const MachineInstr &MI) {
267 switch (MI.getOpcode()) {
268 default:
269 return false;
270 case AArch64::STGi:
271 case AArch64::STZGi:
272 case AArch64::ST2Gi:
273 case AArch64::STZ2Gi:
274 return true;
275 }
276}
277
278static unsigned getMatchingNonSExtOpcode(unsigned Opc,
279 bool *IsValidLdStrOpc = nullptr) {
280 if (IsValidLdStrOpc)
281 *IsValidLdStrOpc = true;
282 switch (Opc) {
283 default:
284 if (IsValidLdStrOpc)
285 *IsValidLdStrOpc = false;
286 return std::numeric_limits<unsigned>::max();
287 case AArch64::STRDui:
288 case AArch64::STURDi:
289 case AArch64::STRDpre:
290 case AArch64::STRQui:
291 case AArch64::STURQi:
292 case AArch64::STRQpre:
293 case AArch64::STRBBui:
294 case AArch64::STURBBi:
295 case AArch64::STRHHui:
296 case AArch64::STURHHi:
297 case AArch64::STRWui:
298 case AArch64::STRWpre:
299 case AArch64::STURWi:
300 case AArch64::STRXui:
301 case AArch64::STRXpre:
302 case AArch64::STURXi:
303 case AArch64::STR_ZXI:
304 case AArch64::LDRDui:
305 case AArch64::LDURDi:
306 case AArch64::LDRDpre:
307 case AArch64::LDRQui:
308 case AArch64::LDURQi:
309 case AArch64::LDRQpre:
310 case AArch64::LDRWui:
311 case AArch64::LDURWi:
312 case AArch64::LDRWpre:
313 case AArch64::LDRXui:
314 case AArch64::LDURXi:
315 case AArch64::LDRXpre:
316 case AArch64::STRSui:
317 case AArch64::STURSi:
318 case AArch64::STRSpre:
319 case AArch64::LDRSui:
320 case AArch64::LDURSi:
321 case AArch64::LDRSpre:
322 case AArch64::LDR_ZXI:
323 return Opc;
324 case AArch64::LDRSWui:
325 return AArch64::LDRWui;
326 case AArch64::LDURSWi:
327 return AArch64::LDURWi;
328 case AArch64::LDRSWpre:
329 return AArch64::LDRWpre;
330 }
331}
332
333static unsigned getMatchingWideOpcode(unsigned Opc) {
334 switch (Opc) {
335 default:
336 llvm_unreachable("Opcode has no wide equivalent!");
337 case AArch64::STRBBui:
338 return AArch64::STRHHui;
339 case AArch64::STRHHui:
340 return AArch64::STRWui;
341 case AArch64::STURBBi:
342 return AArch64::STURHHi;
343 case AArch64::STURHHi:
344 return AArch64::STURWi;
345 case AArch64::STURWi:
346 return AArch64::STURXi;
347 case AArch64::STRWui:
348 return AArch64::STRXui;
349 }
350}
351
352static unsigned getMatchingPairOpcode(unsigned Opc) {
353 switch (Opc) {
354 default:
355 llvm_unreachable("Opcode has no pairwise equivalent!");
356 case AArch64::STRSui:
357 case AArch64::STURSi:
358 return AArch64::STPSi;
359 case AArch64::STRSpre:
360 return AArch64::STPSpre;
361 case AArch64::STRDui:
362 case AArch64::STURDi:
363 return AArch64::STPDi;
364 case AArch64::STRDpre:
365 return AArch64::STPDpre;
366 case AArch64::STRQui:
367 case AArch64::STURQi:
368 case AArch64::STR_ZXI:
369 return AArch64::STPQi;
370 case AArch64::STRQpre:
371 return AArch64::STPQpre;
372 case AArch64::STRWui:
373 case AArch64::STURWi:
374 return AArch64::STPWi;
375 case AArch64::STRWpre:
376 return AArch64::STPWpre;
377 case AArch64::STRXui:
378 case AArch64::STURXi:
379 return AArch64::STPXi;
380 case AArch64::STRXpre:
381 return AArch64::STPXpre;
382 case AArch64::LDRSui:
383 case AArch64::LDURSi:
384 return AArch64::LDPSi;
385 case AArch64::LDRSpre:
386 return AArch64::LDPSpre;
387 case AArch64::LDRDui:
388 case AArch64::LDURDi:
389 return AArch64::LDPDi;
390 case AArch64::LDRDpre:
391 return AArch64::LDPDpre;
392 case AArch64::LDRQui:
393 case AArch64::LDURQi:
394 case AArch64::LDR_ZXI:
395 return AArch64::LDPQi;
396 case AArch64::LDRQpre:
397 return AArch64::LDPQpre;
398 case AArch64::LDRWui:
399 case AArch64::LDURWi:
400 return AArch64::LDPWi;
401 case AArch64::LDRWpre:
402 return AArch64::LDPWpre;
403 case AArch64::LDRXui:
404 case AArch64::LDURXi:
405 return AArch64::LDPXi;
406 case AArch64::LDRXpre:
407 return AArch64::LDPXpre;
408 case AArch64::LDRSWui:
409 case AArch64::LDURSWi:
410 return AArch64::LDPSWi;
411 case AArch64::LDRSWpre:
412 return AArch64::LDPSWpre;
413 }
414}
415
418 unsigned LdOpc = LoadInst.getOpcode();
419 unsigned StOpc = StoreInst.getOpcode();
420 switch (LdOpc) {
421 default:
422 llvm_unreachable("Unsupported load instruction!");
423 case AArch64::LDRBBui:
424 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
425 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
426 case AArch64::LDURBBi:
427 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
428 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
429 case AArch64::LDRHHui:
430 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
431 StOpc == AArch64::STRXui;
432 case AArch64::LDURHHi:
433 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
434 StOpc == AArch64::STURXi;
435 case AArch64::LDRWui:
436 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
437 case AArch64::LDURWi:
438 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
439 case AArch64::LDRXui:
440 return StOpc == AArch64::STRXui;
441 case AArch64::LDURXi:
442 return StOpc == AArch64::STURXi;
443 }
444}
445
446static unsigned getPreIndexedOpcode(unsigned Opc) {
447 // FIXME: We don't currently support creating pre-indexed loads/stores when
448 // the load or store is the unscaled version. If we decide to perform such an
449 // optimization in the future the cases for the unscaled loads/stores will
450 // need to be added here.
451 switch (Opc) {
452 default:
453 llvm_unreachable("Opcode has no pre-indexed equivalent!");
454 case AArch64::STRBui:
455 return AArch64::STRBpre;
456 case AArch64::STRHui:
457 return AArch64::STRHpre;
458 case AArch64::STRSui:
459 return AArch64::STRSpre;
460 case AArch64::STRDui:
461 return AArch64::STRDpre;
462 case AArch64::STRQui:
463 return AArch64::STRQpre;
464 case AArch64::STRBBui:
465 return AArch64::STRBBpre;
466 case AArch64::STRHHui:
467 return AArch64::STRHHpre;
468 case AArch64::STRWui:
469 return AArch64::STRWpre;
470 case AArch64::STRXui:
471 return AArch64::STRXpre;
472 case AArch64::LDRBui:
473 return AArch64::LDRBpre;
474 case AArch64::LDRHui:
475 return AArch64::LDRHpre;
476 case AArch64::LDRSui:
477 return AArch64::LDRSpre;
478 case AArch64::LDRDui:
479 return AArch64::LDRDpre;
480 case AArch64::LDRQui:
481 return AArch64::LDRQpre;
482 case AArch64::LDRBBui:
483 return AArch64::LDRBBpre;
484 case AArch64::LDRHHui:
485 return AArch64::LDRHHpre;
486 case AArch64::LDRWui:
487 return AArch64::LDRWpre;
488 case AArch64::LDRXui:
489 return AArch64::LDRXpre;
490 case AArch64::LDRSWui:
491 return AArch64::LDRSWpre;
492 case AArch64::LDPSi:
493 return AArch64::LDPSpre;
494 case AArch64::LDPSWi:
495 return AArch64::LDPSWpre;
496 case AArch64::LDPDi:
497 return AArch64::LDPDpre;
498 case AArch64::LDPQi:
499 return AArch64::LDPQpre;
500 case AArch64::LDPWi:
501 return AArch64::LDPWpre;
502 case AArch64::LDPXi:
503 return AArch64::LDPXpre;
504 case AArch64::STPSi:
505 return AArch64::STPSpre;
506 case AArch64::STPDi:
507 return AArch64::STPDpre;
508 case AArch64::STPQi:
509 return AArch64::STPQpre;
510 case AArch64::STPWi:
511 return AArch64::STPWpre;
512 case AArch64::STPXi:
513 return AArch64::STPXpre;
514 case AArch64::STGi:
515 return AArch64::STGPreIndex;
516 case AArch64::STZGi:
517 return AArch64::STZGPreIndex;
518 case AArch64::ST2Gi:
519 return AArch64::ST2GPreIndex;
520 case AArch64::STZ2Gi:
521 return AArch64::STZ2GPreIndex;
522 case AArch64::STGPi:
523 return AArch64::STGPpre;
524 }
525}
526
527static unsigned getBaseAddressOpcode(unsigned Opc) {
528 // TODO: Add more index address stores.
529 switch (Opc) {
530 default:
531 llvm_unreachable("Opcode has no base address equivalent!");
532 case AArch64::LDRBroX:
533 return AArch64::LDRBui;
534 case AArch64::LDRBBroX:
535 return AArch64::LDRBBui;
536 case AArch64::LDRSBXroX:
537 return AArch64::LDRSBXui;
538 case AArch64::LDRSBWroX:
539 return AArch64::LDRSBWui;
540 case AArch64::LDRHroX:
541 return AArch64::LDRHui;
542 case AArch64::LDRHHroX:
543 return AArch64::LDRHHui;
544 case AArch64::LDRSHXroX:
545 return AArch64::LDRSHXui;
546 case AArch64::LDRSHWroX:
547 return AArch64::LDRSHWui;
548 case AArch64::LDRWroX:
549 return AArch64::LDRWui;
550 case AArch64::LDRSroX:
551 return AArch64::LDRSui;
552 case AArch64::LDRSWroX:
553 return AArch64::LDRSWui;
554 case AArch64::LDRDroX:
555 return AArch64::LDRDui;
556 case AArch64::LDRXroX:
557 return AArch64::LDRXui;
558 case AArch64::LDRQroX:
559 return AArch64::LDRQui;
560 }
561}
562
563static unsigned getPostIndexedOpcode(unsigned Opc) {
564 switch (Opc) {
565 default:
566 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
567 case AArch64::STRBui:
568 return AArch64::STRBpost;
569 case AArch64::STRHui:
570 return AArch64::STRHpost;
571 case AArch64::STRSui:
572 case AArch64::STURSi:
573 return AArch64::STRSpost;
574 case AArch64::STRDui:
575 case AArch64::STURDi:
576 return AArch64::STRDpost;
577 case AArch64::STRQui:
578 case AArch64::STURQi:
579 return AArch64::STRQpost;
580 case AArch64::STRBBui:
581 return AArch64::STRBBpost;
582 case AArch64::STRHHui:
583 return AArch64::STRHHpost;
584 case AArch64::STRWui:
585 case AArch64::STURWi:
586 return AArch64::STRWpost;
587 case AArch64::STRXui:
588 case AArch64::STURXi:
589 return AArch64::STRXpost;
590 case AArch64::LDRBui:
591 return AArch64::LDRBpost;
592 case AArch64::LDRHui:
593 return AArch64::LDRHpost;
594 case AArch64::LDRSui:
595 case AArch64::LDURSi:
596 return AArch64::LDRSpost;
597 case AArch64::LDRDui:
598 case AArch64::LDURDi:
599 return AArch64::LDRDpost;
600 case AArch64::LDRQui:
601 case AArch64::LDURQi:
602 return AArch64::LDRQpost;
603 case AArch64::LDRBBui:
604 return AArch64::LDRBBpost;
605 case AArch64::LDRHHui:
606 return AArch64::LDRHHpost;
607 case AArch64::LDRWui:
608 case AArch64::LDURWi:
609 return AArch64::LDRWpost;
610 case AArch64::LDRXui:
611 case AArch64::LDURXi:
612 return AArch64::LDRXpost;
613 case AArch64::LDRSWui:
614 return AArch64::LDRSWpost;
615 case AArch64::LDPSi:
616 return AArch64::LDPSpost;
617 case AArch64::LDPSWi:
618 return AArch64::LDPSWpost;
619 case AArch64::LDPDi:
620 return AArch64::LDPDpost;
621 case AArch64::LDPQi:
622 return AArch64::LDPQpost;
623 case AArch64::LDPWi:
624 return AArch64::LDPWpost;
625 case AArch64::LDPXi:
626 return AArch64::LDPXpost;
627 case AArch64::STPSi:
628 return AArch64::STPSpost;
629 case AArch64::STPDi:
630 return AArch64::STPDpost;
631 case AArch64::STPQi:
632 return AArch64::STPQpost;
633 case AArch64::STPWi:
634 return AArch64::STPWpost;
635 case AArch64::STPXi:
636 return AArch64::STPXpost;
637 case AArch64::STGi:
638 return AArch64::STGPostIndex;
639 case AArch64::STZGi:
640 return AArch64::STZGPostIndex;
641 case AArch64::ST2Gi:
642 return AArch64::ST2GPostIndex;
643 case AArch64::STZ2Gi:
644 return AArch64::STZ2GPostIndex;
645 case AArch64::STGPi:
646 return AArch64::STGPpost;
647 }
648}
649
651
652 unsigned OpcA = FirstMI.getOpcode();
653 unsigned OpcB = MI.getOpcode();
654
655 switch (OpcA) {
656 default:
657 return false;
658 case AArch64::STRSpre:
659 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
660 case AArch64::STRDpre:
661 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
662 case AArch64::STRQpre:
663 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
664 case AArch64::STRWpre:
665 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
666 case AArch64::STRXpre:
667 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
668 case AArch64::LDRSpre:
669 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
670 case AArch64::LDRDpre:
671 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
672 case AArch64::LDRQpre:
673 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
674 case AArch64::LDRWpre:
675 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
676 case AArch64::LDRXpre:
677 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
678 case AArch64::LDRSWpre:
679 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
680 }
681}
682
683// Returns the scale and offset range of pre/post indexed variants of MI.
684static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
685 int &MinOffset, int &MaxOffset) {
686 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
687 bool IsTagStore = isTagStore(MI);
688 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
689 // as in the "unsigned offset" variant.
690 // All other pre/post indexed ldst instructions are unscaled.
691 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
692
693 if (IsPaired) {
694 MinOffset = -64;
695 MaxOffset = 63;
696 } else {
697 MinOffset = -256;
698 MaxOffset = 255;
699 }
700}
701
703 unsigned PairedRegOp = 0) {
704 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
705 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
706 if (IsPreLdSt)
707 PairedRegOp += 1;
708 unsigned Idx =
709 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
710 return MI.getOperand(Idx);
711}
712
715 const AArch64InstrInfo *TII) {
716 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
717 int LoadSize = TII->getMemScale(LoadInst);
718 int StoreSize = TII->getMemScale(StoreInst);
719 int UnscaledStOffset =
720 TII->hasUnscaledLdStOffset(StoreInst)
723 int UnscaledLdOffset =
724 TII->hasUnscaledLdStOffset(LoadInst)
727 return (UnscaledStOffset <= UnscaledLdOffset) &&
728 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
729}
730
732 unsigned Opc = MI.getOpcode();
733 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
734 isNarrowStore(Opc)) &&
735 getLdStRegOp(MI).getReg() == AArch64::WZR;
736}
737
739 switch (MI.getOpcode()) {
740 default:
741 return false;
742 // Scaled instructions.
743 case AArch64::LDRBBui:
744 case AArch64::LDRHHui:
745 case AArch64::LDRWui:
746 case AArch64::LDRXui:
747 // Unscaled instructions.
748 case AArch64::LDURBBi:
749 case AArch64::LDURHHi:
750 case AArch64::LDURWi:
751 case AArch64::LDURXi:
752 return true;
753 }
754}
755
757 unsigned Opc = MI.getOpcode();
758 switch (Opc) {
759 default:
760 return false;
761 // Scaled instructions.
762 case AArch64::STRBui:
763 case AArch64::STRHui:
764 case AArch64::STRSui:
765 case AArch64::STRDui:
766 case AArch64::STRQui:
767 case AArch64::STRXui:
768 case AArch64::STRWui:
769 case AArch64::STRHHui:
770 case AArch64::STRBBui:
771 case AArch64::LDRBui:
772 case AArch64::LDRHui:
773 case AArch64::LDRSui:
774 case AArch64::LDRDui:
775 case AArch64::LDRQui:
776 case AArch64::LDRXui:
777 case AArch64::LDRWui:
778 case AArch64::LDRHHui:
779 case AArch64::LDRBBui:
780 case AArch64::STGi:
781 case AArch64::STZGi:
782 case AArch64::ST2Gi:
783 case AArch64::STZ2Gi:
784 case AArch64::STGPi:
785 // Unscaled instructions.
786 case AArch64::STURSi:
787 case AArch64::STURDi:
788 case AArch64::STURQi:
789 case AArch64::STURWi:
790 case AArch64::STURXi:
791 case AArch64::LDURSi:
792 case AArch64::LDURDi:
793 case AArch64::LDURQi:
794 case AArch64::LDURWi:
795 case AArch64::LDURXi:
796 // Paired instructions.
797 case AArch64::LDPSi:
798 case AArch64::LDPSWi:
799 case AArch64::LDPDi:
800 case AArch64::LDPQi:
801 case AArch64::LDPWi:
802 case AArch64::LDPXi:
803 case AArch64::STPSi:
804 case AArch64::STPDi:
805 case AArch64::STPQi:
806 case AArch64::STPWi:
807 case AArch64::STPXi:
808 // Make sure this is a reg+imm (as opposed to an address reloc).
810 return false;
811
812 // When using stack tagging, simple sp+imm loads and stores are not
813 // tag-checked, but pre- and post-indexed versions of them are, so we can't
814 // replace the former with the latter. This transformation would be valid
815 // if the load/store accesses an untagged stack slot, but we don't have
816 // that information available after frame indices have been eliminated.
817 if (AFI.isMTETagged() &&
818 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
819 return false;
820
821 return true;
822 }
823}
824
825// Make sure this is a reg+reg Ld/St
826static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
827 unsigned Opc = MI.getOpcode();
828 switch (Opc) {
829 default:
830 return false;
831 // Scaled instructions.
832 // TODO: Add more index address stores.
833 case AArch64::LDRBroX:
834 case AArch64::LDRBBroX:
835 case AArch64::LDRSBXroX:
836 case AArch64::LDRSBWroX:
837 Scale = 1;
838 return true;
839 case AArch64::LDRHroX:
840 case AArch64::LDRHHroX:
841 case AArch64::LDRSHXroX:
842 case AArch64::LDRSHWroX:
843 Scale = 2;
844 return true;
845 case AArch64::LDRWroX:
846 case AArch64::LDRSroX:
847 case AArch64::LDRSWroX:
848 Scale = 4;
849 return true;
850 case AArch64::LDRDroX:
851 case AArch64::LDRXroX:
852 Scale = 8;
853 return true;
854 case AArch64::LDRQroX:
855 Scale = 16;
856 return true;
857 }
858}
859
861 switch (MO.getParent()->getOpcode()) {
862 default:
863 return MO.isRenamable();
864 case AArch64::ORRWrs:
865 case AArch64::ADDWri:
866 return true;
867 }
868}
869
871AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
873 const LdStPairFlags &Flags) {
875 "Expected promotable zero stores.");
876
877 MachineBasicBlock::iterator E = I->getParent()->end();
879 // If NextI is the second of the two instructions to be merged, we need
880 // to skip one further. Either way we merge will invalidate the iterator,
881 // and we don't need to scan the new instruction, as it's a pairwise
882 // instruction, which we're not considering for further action anyway.
883 if (NextI == MergeMI)
884 NextI = next_nodbg(NextI, E);
885
886 unsigned Opc = I->getOpcode();
887 unsigned MergeMIOpc = MergeMI->getOpcode();
888 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
889 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
890 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
891 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
892
893 bool MergeForward = Flags.getMergeForward();
894 // Insert our new paired instruction after whichever of the paired
895 // instructions MergeForward indicates.
896 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
897 // Also based on MergeForward is from where we copy the base register operand
898 // so we get the flags compatible with the input code.
899 const MachineOperand &BaseRegOp =
900 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
901 : AArch64InstrInfo::getLdStBaseOp(*I);
902
903 // Which register is Rt and which is Rt2 depends on the offset order.
904 int64_t IOffsetInBytes =
905 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
906 int64_t MIOffsetInBytes =
908 MergeMIOffsetStride;
909 // Select final offset based on the offset order.
910 int64_t OffsetImm;
911 if (IOffsetInBytes > MIOffsetInBytes)
912 OffsetImm = MIOffsetInBytes;
913 else
914 OffsetImm = IOffsetInBytes;
915
916 int NewOpcode = getMatchingWideOpcode(Opc);
917 // Adjust final offset on scaled stores because the new instruction
918 // has a different scale.
919 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
920 int NewOffsetStride = TII->getMemScale(NewOpcode);
921 assert(((OffsetImm % NewOffsetStride) == 0) &&
922 "Offset should be a multiple of the store memory scale");
923 OffsetImm = OffsetImm / NewOffsetStride;
924 }
925
926 // Construct the new instruction.
927 DebugLoc DL = I->getDebugLoc();
928 MachineBasicBlock *MBB = I->getParent();
929 MachineInstrBuilder MIB;
930 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
931 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
932 .add(BaseRegOp)
933 .addImm(OffsetImm)
934 .cloneMergedMemRefs({&*I, &*MergeMI})
935 .setMIFlags(I->mergeFlagsWith(*MergeMI));
936 (void)MIB;
937
938 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
939 LLVM_DEBUG(I->print(dbgs()));
940 LLVM_DEBUG(dbgs() << " ");
941 LLVM_DEBUG(MergeMI->print(dbgs()));
942 LLVM_DEBUG(dbgs() << " with instruction:\n ");
943 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
944 LLVM_DEBUG(dbgs() << "\n");
945
946 // Erase the old instructions.
947 I->eraseFromParent();
948 MergeMI->eraseFromParent();
949 return NextI;
950}
951
952// Apply Fn to all instructions between MI and the beginning of the block, until
953// a def for DefReg is reached. Returns true, iff Fn returns true for all
954// visited instructions. Stop after visiting Limit iterations.
956 const TargetRegisterInfo *TRI, unsigned Limit,
957 std::function<bool(MachineInstr &, bool)> &Fn) {
958 auto MBB = MI.getParent();
959 for (MachineInstr &I :
960 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
961 if (!Limit)
962 return false;
963 --Limit;
964
965 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
966 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
967 TRI->regsOverlap(MOP.getReg(), DefReg);
968 });
969 if (!Fn(I, isDef))
970 return false;
971 if (isDef)
972 break;
973 }
974 return true;
975}
976
978 const TargetRegisterInfo *TRI) {
979
980 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
981 if (MOP.isReg() && MOP.isKill())
982 Units.removeReg(MOP.getReg());
983
984 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
985 if (MOP.isReg() && !MOP.isKill())
986 Units.addReg(MOP.getReg());
987}
988
989/// This function will add a new entry into the debugValueSubstitutions table
990/// when two instruction have been merged into a new one represented by \p
991/// MergedInstr.
993 unsigned InstrNumToSet,
994 MachineInstr &OriginalInstr,
995 MachineInstr &MergedInstr) {
996
997 // Figure out the Operand Index of the destination register of the
998 // OriginalInstr in the new MergedInstr.
999 auto Reg = OriginalInstr.getOperand(0).getReg();
1000 unsigned OperandNo = 0;
1001 bool RegFound = false;
1002 for (const auto Op : MergedInstr.operands()) {
1003 if (Op.getReg() == Reg) {
1004 RegFound = true;
1005 break;
1006 }
1007 OperandNo++;
1008 }
1009
1010 if (RegFound)
1011 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
1012 {InstrNumToSet, OperandNo});
1013}
1014
1016AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
1018 const LdStPairFlags &Flags) {
1019 MachineBasicBlock::iterator E = I->getParent()->end();
1021 // If NextI is the second of the two instructions to be merged, we need
1022 // to skip one further. Either way we merge will invalidate the iterator,
1023 // and we don't need to scan the new instruction, as it's a pairwise
1024 // instruction, which we're not considering for further action anyway.
1025 if (NextI == Paired)
1026 NextI = next_nodbg(NextI, E);
1027
1028 int SExtIdx = Flags.getSExtIdx();
1029 unsigned Opc =
1030 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1031 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1032 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1033
1034 bool MergeForward = Flags.getMergeForward();
1035
1036 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1037 if (RenameReg) {
1038 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1039 DefinedInBB.addReg(*RenameReg);
1040
1041 // Return the sub/super register for RenameReg, matching the size of
1042 // OriginalReg.
1043 auto GetMatchingSubReg =
1044 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1045 for (MCPhysReg SubOrSuper :
1046 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1047 if (C->contains(SubOrSuper))
1048 return SubOrSuper;
1049 }
1050 llvm_unreachable("Should have found matching sub or super register!");
1051 };
1052
1053 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1054 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1055 bool IsDef) {
1056 if (IsDef) {
1057 bool SeenDef = false;
1058 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1059 MachineOperand &MOP = MI.getOperand(OpIdx);
1060 // Rename the first explicit definition and all implicit
1061 // definitions matching RegToRename.
1062 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1063 (!MergeForward || !SeenDef ||
1064 (MOP.isDef() && MOP.isImplicit())) &&
1065 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1066 assert((MOP.isImplicit() ||
1067 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1068 "Need renamable operands");
1069 Register MatchingReg;
1070 if (const TargetRegisterClass *RC =
1071 MI.getRegClassConstraint(OpIdx, TII, TRI))
1072 MatchingReg = GetMatchingSubReg(RC);
1073 else {
1074 if (!isRewritableImplicitDef(MOP))
1075 continue;
1076 MatchingReg = GetMatchingSubReg(
1077 TRI->getMinimalPhysRegClass(MOP.getReg()));
1078 }
1079 MOP.setReg(MatchingReg);
1080 SeenDef = true;
1081 }
1082 }
1083 } else {
1084 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1085 MachineOperand &MOP = MI.getOperand(OpIdx);
1086 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1087 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1088 assert((MOP.isImplicit() ||
1089 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1090 "Need renamable operands");
1091 Register MatchingReg;
1092 if (const TargetRegisterClass *RC =
1093 MI.getRegClassConstraint(OpIdx, TII, TRI))
1094 MatchingReg = GetMatchingSubReg(RC);
1095 else
1096 MatchingReg = GetMatchingSubReg(
1097 TRI->getMinimalPhysRegClass(MOP.getReg()));
1098 assert(MatchingReg != AArch64::NoRegister &&
1099 "Cannot find matching regs for renaming");
1100 MOP.setReg(MatchingReg);
1101 }
1102 }
1103 }
1104 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1105 return true;
1106 };
1107 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1108 TRI, UINT32_MAX, UpdateMIs);
1109
1110#if !defined(NDEBUG)
1111 // For forward merging store:
1112 // Make sure the register used for renaming is not used between the
1113 // paired instructions. That would trash the content before the new
1114 // paired instruction.
1115 MCPhysReg RegToCheck = *RenameReg;
1116 // For backward merging load:
1117 // Make sure the register being renamed is not used between the
1118 // paired instructions. That would trash the content after the new
1119 // paired instruction.
1120 if (!MergeForward)
1121 RegToCheck = RegToRename;
1122 for (auto &MI :
1123 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1124 MergeForward ? std::next(I) : I,
1125 MergeForward ? std::next(Paired) : Paired))
1126 assert(all_of(MI.operands(),
1127 [this, RegToCheck](const MachineOperand &MOP) {
1128 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1129 MOP.isUndef() ||
1130 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1131 }) &&
1132 "Rename register used between paired instruction, trashing the "
1133 "content");
1134#endif
1135 }
1136
1137 // Insert our new paired instruction after whichever of the paired
1138 // instructions MergeForward indicates.
1139 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1140 // Also based on MergeForward is from where we copy the base register operand
1141 // so we get the flags compatible with the input code.
1142 const MachineOperand &BaseRegOp =
1143 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1144 : AArch64InstrInfo::getLdStBaseOp(*I);
1145
1147 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1148 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1149 if (IsUnscaled != PairedIsUnscaled) {
1150 // We're trying to pair instructions that differ in how they are scaled. If
1151 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1152 // the opposite (i.e., make Paired's offset unscaled).
1153 int MemSize = TII->getMemScale(*Paired);
1154 if (PairedIsUnscaled) {
1155 // If the unscaled offset isn't a multiple of the MemSize, we can't
1156 // pair the operations together.
1157 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1158 "Offset should be a multiple of the stride!");
1159 PairedOffset /= MemSize;
1160 } else {
1161 PairedOffset *= MemSize;
1162 }
1163 }
1164
1165 // Which register is Rt and which is Rt2 depends on the offset order.
1166 // However, for pre load/stores the Rt should be the one of the pre
1167 // load/store.
1168 MachineInstr *RtMI, *Rt2MI;
1169 if (Offset == PairedOffset + OffsetStride &&
1171 RtMI = &*Paired;
1172 Rt2MI = &*I;
1173 // Here we swapped the assumption made for SExtIdx.
1174 // I.e., we turn ldp I, Paired into ldp Paired, I.
1175 // Update the index accordingly.
1176 if (SExtIdx != -1)
1177 SExtIdx = (SExtIdx + 1) % 2;
1178 } else {
1179 RtMI = &*I;
1180 Rt2MI = &*Paired;
1181 }
1182 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1183 // Scale the immediate offset, if necessary.
1184 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1185 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1186 "Unscaled offset cannot be scaled.");
1187 OffsetImm /= TII->getMemScale(*RtMI);
1188 }
1189
1190 // Construct the new instruction.
1191 MachineInstrBuilder MIB;
1192 DebugLoc DL = I->getDebugLoc();
1193 MachineBasicBlock *MBB = I->getParent();
1194 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1195 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1196 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1197 // Kill flags may become invalid when moving stores for pairing.
1198 if (RegOp0.isUse()) {
1199 if (!MergeForward) {
1200 // Clear kill flags on store if moving upwards. Example:
1201 // STRWui kill %w0, ...
1202 // USE %w1
1203 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1204 // We are about to move the store of w1, so its kill flag may become
1205 // invalid; not the case for w0.
1206 // Since w1 is used between the stores, the kill flag on w1 is cleared
1207 // after merging.
1208 // STPWi kill %w0, %w1, ...
1209 // USE %w1
1210 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1211 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1212 PairedRegOp.setIsKill(false);
1213 } else {
1214 // Clear kill flags of the first stores register. Example:
1215 // STRWui %w1, ...
1216 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1217 // STRW %w0
1219 for (MachineInstr &MI :
1220 make_range(std::next(I->getIterator()), Paired->getIterator()))
1221 MI.clearRegisterKills(Reg, TRI);
1222 }
1223 }
1224
1225 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1226 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1227
1228 // Adds the pre-index operand for pre-indexed ld/st pairs.
1229 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1230 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1231
1232 MIB.add(RegOp0)
1233 .add(RegOp1)
1234 .add(BaseRegOp)
1235 .addImm(OffsetImm)
1236 .cloneMergedMemRefs({&*I, &*Paired})
1237 .setMIFlags(I->mergeFlagsWith(*Paired));
1238
1239 (void)MIB;
1240
1241 LLVM_DEBUG(
1242 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1243 LLVM_DEBUG(I->print(dbgs()));
1244 LLVM_DEBUG(dbgs() << " ");
1245 LLVM_DEBUG(Paired->print(dbgs()));
1246 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1247 if (SExtIdx != -1) {
1248 // Generate the sign extension for the proper result of the ldp.
1249 // I.e., with X1, that would be:
1250 // %w1 = KILL %w1, implicit-def %x1
1251 // %x1 = SBFMXri killed %x1, 0, 31
1252 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1253 // Right now, DstMO has the extended register, since it comes from an
1254 // extended opcode.
1255 Register DstRegX = DstMO.getReg();
1256 // Get the W variant of that register.
1257 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1258 // Update the result of LDP to use the W instead of the X variant.
1259 DstMO.setReg(DstRegW);
1260 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1261 LLVM_DEBUG(dbgs() << "\n");
1262 // Make the machine verifier happy by providing a definition for
1263 // the X register.
1264 // Insert this definition right after the generated LDP, i.e., before
1265 // InsertionPoint.
1266 MachineInstrBuilder MIBKill =
1267 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1268 .addReg(DstRegW)
1269 .addReg(DstRegX, RegState::Define);
1270 MIBKill->getOperand(2).setImplicit();
1271 // Create the sign extension.
1272 MachineInstrBuilder MIBSXTW =
1273 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1274 .addReg(DstRegX)
1275 .addImm(0)
1276 .addImm(31);
1277 (void)MIBSXTW;
1278
1279 // In the case of a sign-extend, where we have something like:
1280 // debugValueSubstitutions:[]
1281 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1282 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1283 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1284 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1285
1286 // It will be converted to:
1287 // debugValueSubstitutions:[]
1288 // $w0, $w1 = LDPWi $x0, 0
1289 // $w0 = KILL $w0, implicit-def $x0
1290 // $x0 = SBFMXri $x0, 0, 31
1291 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1292 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1293
1294 // We want the final result to look like:
1295 // debugValueSubstitutions:
1296 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1297 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1298 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1299 // $w0 = KILL $w0, implicit-def $x0
1300 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1301 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1302 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1303
1304 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1305 // instruction contains the final value we care about we give it a new
1306 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1307 // about, therefore the LDP instruction is also given a new
1308 // debug-instr-number 4. We have to add these substitutions to the
1309 // debugValueSubstitutions table. However, we also have to ensure that the
1310 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1311 // $w1 is the second operand of the LDP instruction.
1312
1313 if (I->peekDebugInstrNum()) {
1314 // If I is the instruction which got sign extended and has a
1315 // debug-instr-number, give the SBFMXri instruction a new
1316 // debug-instr-number, and update the debugValueSubstitutions table with
1317 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1318 // instruction a new debug-instr-number, and update the
1319 // debugValueSubstitutions table with the new debug-instr-number and
1320 // OpIndex pair.
1321 unsigned NewInstrNum;
1322 if (DstRegX == I->getOperand(0).getReg()) {
1323 NewInstrNum = MIBSXTW->getDebugInstrNum();
1324 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1325 *MIBSXTW);
1326 } else {
1327 NewInstrNum = MIB->getDebugInstrNum();
1328 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1329 }
1330 }
1331 if (Paired->peekDebugInstrNum()) {
1332 // If Paired is the instruction which got sign extended and has a
1333 // debug-instr-number, give the SBFMXri instruction a new
1334 // debug-instr-number, and update the debugValueSubstitutions table with
1335 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1336 // instruction a new debug-instr-number, and update the
1337 // debugValueSubstitutions table with the new debug-instr-number and
1338 // OpIndex pair.
1339 unsigned NewInstrNum;
1340 if (DstRegX == Paired->getOperand(0).getReg()) {
1341 NewInstrNum = MIBSXTW->getDebugInstrNum();
1342 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1343 *MIBSXTW);
1344 } else {
1345 NewInstrNum = MIB->getDebugInstrNum();
1346 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1347 *MIB);
1348 }
1349 }
1350
1351 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1352 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1353 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1354 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1355 // variant of the registers.
1356 MachineOperand &MOp0 = MIB->getOperand(0);
1357 MachineOperand &MOp1 = MIB->getOperand(1);
1358 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1359 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1360 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1361 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1362 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1363 } else {
1364
1365 // In the case that the merge doesn't result in a sign-extend, if we have
1366 // something like:
1367 // debugValueSubstitutions:[]
1368 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1369 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1370 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1371 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1372
1373 // It will be converted to:
1374 // debugValueSubstitutions: []
1375 // $x0, $x1 = LDPXi $x0, 0
1376 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1377 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1378
1379 // We want the final result to look like:
1380 // debugValueSubstitutions:
1381 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1382 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1383 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1384 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1385 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1386
1387 // Here all that needs to be done is, that the LDP instruction needs to be
1388 // updated with a new debug-instr-number, we then need to add entries into
1389 // the debugSubstitutions table to map the old instr-refs to the new ones.
1390
1391 // Assign new DebugInstrNum to the Paired instruction.
1392 if (I->peekDebugInstrNum()) {
1393 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1394 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1395 *MIB);
1396 }
1397 if (Paired->peekDebugInstrNum()) {
1398 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1399 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1400 *MIB);
1401 }
1402
1403 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1404 }
1405 LLVM_DEBUG(dbgs() << "\n");
1406
1407 if (MergeForward)
1408 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1409 if (MOP.isReg() && MOP.isKill())
1410 DefinedInBB.addReg(MOP.getReg());
1411
1412 // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
1413 // only copies implicit defs and makes sure that each operand is only added
1414 // once in case of duplicates.
1415 auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
1417 SmallSetVector<Register, 4> Ops;
1418 for (const MachineOperand &MO :
1419 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1420 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1421 Ops.insert(MO.getReg());
1422 for (const MachineOperand &MO :
1423 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1424 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1425 Ops.insert(MO.getReg());
1426 for (auto Op : Ops)
1427 MIB.addDef(Op, RegState::Implicit);
1428 };
1429 CopyImplicitOps(I, Paired);
1430
1431 // Erase the old instructions.
1432 I->eraseFromParent();
1433 Paired->eraseFromParent();
1434
1435 return NextI;
1436}
1437
1439AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1442 next_nodbg(LoadI, LoadI->getParent()->end());
1443
1444 int LoadSize = TII->getMemScale(*LoadI);
1445 int StoreSize = TII->getMemScale(*StoreI);
1446 Register LdRt = getLdStRegOp(*LoadI).getReg();
1447 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1448 Register StRt = getLdStRegOp(*StoreI).getReg();
1449 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1450
1451 assert((IsStoreXReg ||
1452 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1453 "Unexpected RegClass");
1454
1455 MachineInstr *BitExtMI;
1456 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1457 // Remove the load, if the destination register of the loads is the same
1458 // register for stored value.
1459 if (StRt == LdRt && LoadSize == 8) {
1460 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1461 LoadI->getIterator())) {
1462 if (MI.killsRegister(StRt, TRI)) {
1463 MI.clearRegisterKills(StRt, TRI);
1464 break;
1465 }
1466 }
1467 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1468 LLVM_DEBUG(LoadI->print(dbgs()));
1469 LLVM_DEBUG(dbgs() << "\n");
1470 LoadI->eraseFromParent();
1471 return NextI;
1472 }
1473 // Replace the load with a mov if the load and store are in the same size.
1474 BitExtMI =
1475 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1476 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1477 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1478 .add(StMO)
1480 .setMIFlags(LoadI->getFlags());
1481 } else {
1482 // FIXME: Currently we disable this transformation in big-endian targets as
1483 // performance and correctness are verified only in little-endian.
1484 if (!Subtarget->isLittleEndian())
1485 return NextI;
1486 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1487 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1488 "Unsupported ld/st match");
1489 assert(LoadSize <= StoreSize && "Invalid load size");
1490 int UnscaledLdOffset =
1491 IsUnscaled
1493 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1494 int UnscaledStOffset =
1495 IsUnscaled
1497 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1498 int Width = LoadSize * 8;
1499 Register DestReg =
1500 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1501 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1502 : LdRt;
1503
1504 assert((UnscaledLdOffset >= UnscaledStOffset &&
1505 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1506 "Invalid offset");
1507
1508 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1509 int Imms = Immr + Width - 1;
1510 if (UnscaledLdOffset == UnscaledStOffset) {
1511 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1512 | ((Immr) << 6) // immr
1513 | ((Imms) << 0) // imms
1514 ;
1515
1516 BitExtMI =
1517 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1518 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1519 DestReg)
1520 .add(StMO)
1521 .addImm(AndMaskEncoded)
1522 .setMIFlags(LoadI->getFlags());
1523 } else if (IsStoreXReg && Imms == 31) {
1524 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1525 // instruction.
1526 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1527 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1528 TII->get(AArch64::UBFMWri),
1529 TRI->getSubReg(DestReg, AArch64::sub_32))
1530 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1531 .addImm(Immr)
1532 .addImm(Imms)
1533 .setMIFlags(LoadI->getFlags());
1534 } else {
1535 BitExtMI =
1536 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1537 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1538 DestReg)
1539 .add(StMO)
1540 .addImm(Immr)
1541 .addImm(Imms)
1542 .setMIFlags(LoadI->getFlags());
1543 }
1544 }
1545
1546 // Clear kill flags between store and load.
1547 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1548 BitExtMI->getIterator()))
1549 if (MI.killsRegister(StRt, TRI)) {
1550 MI.clearRegisterKills(StRt, TRI);
1551 break;
1552 }
1553
1554 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1555 LLVM_DEBUG(StoreI->print(dbgs()));
1556 LLVM_DEBUG(dbgs() << " ");
1557 LLVM_DEBUG(LoadI->print(dbgs()));
1558 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1559 LLVM_DEBUG(StoreI->print(dbgs()));
1560 LLVM_DEBUG(dbgs() << " ");
1561 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1562 LLVM_DEBUG(dbgs() << "\n");
1563
1564 // Erase the old instructions.
1565 LoadI->eraseFromParent();
1566 return NextI;
1567}
1568
1569static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1570 // Convert the byte-offset used by unscaled into an "element" offset used
1571 // by the scaled pair load/store instructions.
1572 if (IsUnscaled) {
1573 // If the byte-offset isn't a multiple of the stride, there's no point
1574 // trying to match it.
1575 if (Offset % OffsetStride)
1576 return false;
1577 Offset /= OffsetStride;
1578 }
1579 return Offset <= 63 && Offset >= -64;
1580}
1581
1582// Do alignment, specialized to power of 2 and for signed ints,
1583// avoiding having to do a C-style cast from uint_64t to int when
1584// using alignTo from include/llvm/Support/MathExtras.h.
1585// FIXME: Move this function to include/MathExtras.h?
1586static int alignTo(int Num, int PowOf2) {
1587 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1588}
1589
1590static bool mayAlias(MachineInstr &MIa,
1592 AliasAnalysis *AA) {
1593 for (MachineInstr *MIb : MemInsns) {
1594 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1595 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1596 return true;
1597 }
1598 }
1599
1600 LLVM_DEBUG(dbgs() << "No aliases found\n");
1601 return false;
1602}
1603
1604bool AArch64LoadStoreOpt::findMatchingStore(
1605 MachineBasicBlock::iterator I, unsigned Limit,
1607 MachineBasicBlock::iterator B = I->getParent()->begin();
1609 MachineInstr &LoadMI = *I;
1611
1612 // If the load is the first instruction in the block, there's obviously
1613 // not any matching store.
1614 if (MBBI == B)
1615 return false;
1616
1617 // Track which register units have been modified and used between the first
1618 // insn and the second insn.
1619 ModifiedRegUnits.clear();
1620 UsedRegUnits.clear();
1621
1622 unsigned Count = 0;
1623 do {
1624 MBBI = prev_nodbg(MBBI, B);
1625 MachineInstr &MI = *MBBI;
1626
1627 // Don't count transient instructions towards the search limit since there
1628 // may be different numbers of them if e.g. debug information is present.
1629 if (!MI.isTransient())
1630 ++Count;
1631
1632 // If the load instruction reads directly from the address to which the
1633 // store instruction writes and the stored value is not modified, we can
1634 // promote the load. Since we do not handle stores with pre-/post-index,
1635 // it's unnecessary to check if BaseReg is modified by the store itself.
1636 // Also we can't handle stores without an immediate offset operand,
1637 // while the operand might be the address for a global variable.
1638 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1641 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1642 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1643 StoreI = MBBI;
1644 return true;
1645 }
1646
1647 if (MI.isCall())
1648 return false;
1649
1650 // Update modified / uses register units.
1651 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1652
1653 // Otherwise, if the base register is modified, we have no match, so
1654 // return early.
1655 if (!ModifiedRegUnits.available(BaseReg))
1656 return false;
1657
1658 // If we encounter a store aliased with the load, return early.
1659 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1660 return false;
1661 } while (MBBI != B && Count < Limit);
1662 return false;
1663}
1664
1665static bool needsWinCFI(const MachineFunction *MF) {
1666 return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1668}
1669
1670// Returns true if FirstMI and MI are candidates for merging or pairing.
1671// Otherwise, returns false.
1673 LdStPairFlags &Flags,
1674 const AArch64InstrInfo *TII) {
1675 // If this is volatile or if pairing is suppressed, not a candidate.
1676 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1677 return false;
1678
1679 // We should have already checked FirstMI for pair suppression and volatility.
1680 assert(!FirstMI.hasOrderedMemoryRef() &&
1681 !TII->isLdStPairSuppressed(FirstMI) &&
1682 "FirstMI shouldn't get here if either of these checks are true.");
1683
1684 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1686 return false;
1687
1688 unsigned OpcA = FirstMI.getOpcode();
1689 unsigned OpcB = MI.getOpcode();
1690
1691 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1692 if (OpcA == OpcB)
1693 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1694
1695 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1696 // allow pairing them with other instructions.
1697 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1698 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1699 return false;
1700
1701 // Two pre ld/st of different opcodes cannot be merged either
1703 return false;
1704
1705 // Try to match a sign-extended load/store with a zero-extended load/store.
1706 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1707 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1708 assert(IsValidLdStrOpc &&
1709 "Given Opc should be a Load or Store with an immediate");
1710 // OpcA will be the first instruction in the pair.
1711 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1712 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1713 return true;
1714 }
1715
1716 // If the second instruction isn't even a mergable/pairable load/store, bail
1717 // out.
1718 if (!PairIsValidLdStrOpc)
1719 return false;
1720
1721 // Narrow stores do not have a matching pair opcodes, so constrain their
1722 // merging to zero stores.
1723 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1724 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1725 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1726 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1727
1728 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1729 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1730 // are candidate pairs that can be merged.
1731 if (isPreLdStPairCandidate(FirstMI, MI))
1732 return true;
1733
1734 // Try to match an unscaled load/store with a scaled load/store.
1735 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1737
1738 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1739}
1740
1741static bool canRenameMOP(const MachineOperand &MOP,
1742 const TargetRegisterInfo *TRI) {
1743 if (MOP.isReg()) {
1744 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1745 // Renaming registers with multiple disjunct sub-registers (e.g. the
1746 // result of a LD3) means that all sub-registers are renamed, potentially
1747 // impacting other instructions we did not check. Bail out.
1748 // Note that this relies on the structure of the AArch64 register file. In
1749 // particular, a subregister cannot be written without overwriting the
1750 // whole register.
1751 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1752 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1753 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1754 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1755 LLVM_DEBUG(
1756 dbgs()
1757 << " Cannot rename operands with multiple disjunct subregisters ("
1758 << MOP << ")\n");
1759 return false;
1760 }
1761
1762 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1763 // them must be known. For example, in ORRWrs the implicit-def
1764 // corresponds to the result register.
1765 if (MOP.isImplicit() && MOP.isDef()) {
1766 if (!isRewritableImplicitDef(MOP))
1767 return false;
1768 return TRI->isSuperOrSubRegisterEq(
1769 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1770 }
1771 }
1772 return MOP.isImplicit() ||
1773 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1774}
1775
1776static bool
1779 const TargetRegisterInfo *TRI) {
1780 if (!FirstMI.mayStore())
1781 return false;
1782
1783 // Check if we can find an unused register which we can use to rename
1784 // the register used by the first load/store.
1785
1786 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1787 // For now, we only rename if the store operand gets killed at the store.
1788 if (!getLdStRegOp(FirstMI).isKill() &&
1789 !any_of(FirstMI.operands(),
1790 [TRI, RegToRename](const MachineOperand &MOP) {
1791 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1792 MOP.isImplicit() && MOP.isKill() &&
1793 TRI->regsOverlap(RegToRename, MOP.getReg());
1794 })) {
1795 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1796 return false;
1797 }
1798
1799 bool FoundDef = false;
1800
1801 // For each instruction between FirstMI and the previous def for RegToRename,
1802 // we
1803 // * check if we can rename RegToRename in this instruction
1804 // * collect the registers used and required register classes for RegToRename.
1805 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1806 bool IsDef) {
1807 LLVM_DEBUG(dbgs() << "Checking " << MI);
1808 // Currently we do not try to rename across frame-setup instructions.
1809 if (MI.getFlag(MachineInstr::FrameSetup)) {
1810 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1811 << "currently\n");
1812 return false;
1813 }
1814
1815 UsedInBetween.accumulate(MI);
1816
1817 // For a definition, check that we can rename the definition and exit the
1818 // loop.
1819 FoundDef = IsDef;
1820
1821 // For defs, check if we can rename the first def of RegToRename.
1822 if (FoundDef) {
1823 // For some pseudo instructions, we might not generate code in the end
1824 // (e.g. KILL) and we would end up without a correct def for the rename
1825 // register.
1826 // TODO: This might be overly conservative and we could handle those cases
1827 // in multiple ways:
1828 // 1. Insert an extra copy, to materialize the def.
1829 // 2. Skip pseudo-defs until we find an non-pseudo def.
1830 if (MI.isPseudo()) {
1831 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1832 return false;
1833 }
1834
1835 for (auto &MOP : MI.operands()) {
1836 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1837 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1838 continue;
1839 if (!canRenameMOP(MOP, TRI)) {
1840 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1841 return false;
1842 }
1843 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1844 }
1845 return true;
1846 } else {
1847 for (auto &MOP : MI.operands()) {
1848 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1849 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1850 continue;
1851
1852 if (!canRenameMOP(MOP, TRI)) {
1853 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1854 return false;
1855 }
1856 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1857 }
1858 }
1859 return true;
1860 };
1861
1862 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1863 return false;
1864
1865 if (!FoundDef) {
1866 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1867 return false;
1868 }
1869 return true;
1870}
1871
1872// We want to merge the second load into the first by rewriting the usages of
1873// the same reg between first (incl.) and second (excl.). We don't need to care
1874// about any insns before FirstLoad or after SecondLoad.
1875// 1. The second load writes new value into the same reg.
1876// - The renaming is impossible to impact later use of the reg.
1877// - The second load always trash the value written by the first load which
1878// means the reg must be killed before the second load.
1879// 2. The first load must be a def for the same reg so we don't need to look
1880// into anything before it.
1882 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1883 LiveRegUnits &UsedInBetween,
1885 const TargetRegisterInfo *TRI) {
1886 if (FirstLoad.isPseudo())
1887 return false;
1888
1889 UsedInBetween.accumulate(FirstLoad);
1890 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1891 bool Success = std::all_of(
1892 FirstLoad.getIterator(), SecondLoad.getIterator(),
1893 [&](MachineInstr &MI) {
1894 LLVM_DEBUG(dbgs() << "Checking " << MI);
1895 // Currently we do not try to rename across frame-setup instructions.
1896 if (MI.getFlag(MachineInstr::FrameSetup)) {
1897 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1898 << "currently\n");
1899 return false;
1900 }
1901
1902 for (auto &MOP : MI.operands()) {
1903 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1904 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1905 continue;
1906 if (!canRenameMOP(MOP, TRI)) {
1907 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1908 return false;
1909 }
1910 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1911 }
1912
1913 return true;
1914 });
1915 return Success;
1916}
1917
1918// Check if we can find a physical register for renaming \p Reg. This register
1919// must:
1920// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1921// defined registers up to the point where the renamed register will be used,
1922// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1923// registers in the range the rename register will be used,
1924// * is available in all used register classes (checked using RequiredClasses).
1925static std::optional<MCPhysReg> tryToFindRegisterToRename(
1926 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1927 LiveRegUnits &UsedInBetween,
1929 const TargetRegisterInfo *TRI) {
1931
1932 // Checks if any sub- or super-register of PR is callee saved.
1933 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1934 return any_of(TRI->sub_and_superregs_inclusive(PR),
1935 [&MF, TRI](MCPhysReg SubOrSuper) {
1936 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1937 });
1938 };
1939
1940 // Check if PR or one of its sub- or super-registers can be used for all
1941 // required register classes.
1942 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1943 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1944 return any_of(
1945 TRI->sub_and_superregs_inclusive(PR),
1946 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1947 });
1948 };
1949
1950 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1951 for (const MCPhysReg &PR : *RegClass) {
1952 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1953 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1954 CanBeUsedForAllClasses(PR)) {
1955 DefinedInBB.addReg(PR);
1956 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1957 << "\n");
1958 return {PR};
1959 }
1960 }
1961 LLVM_DEBUG(dbgs() << "No rename register found from "
1962 << TRI->getRegClassName(RegClass) << "\n");
1963 return std::nullopt;
1964}
1965
1966// For store pairs: returns a register from FirstMI to the beginning of the
1967// block that can be renamed.
1968// For load pairs: returns a register from FirstMI to MI that can be renamed.
1969static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1970 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1971 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1973 const TargetRegisterInfo *TRI) {
1974 std::optional<MCPhysReg> RenameReg;
1975 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1976 return RenameReg;
1977
1978 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1979 MachineFunction &MF = *FirstMI.getParent()->getParent();
1980 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1981 return RenameReg;
1982
1983 const bool IsLoad = FirstMI.mayLoad();
1984
1985 if (!MaybeCanRename) {
1986 if (IsLoad)
1987 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1988 RequiredClasses, TRI)};
1989 else
1990 MaybeCanRename = {
1991 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
1992 }
1993
1994 if (*MaybeCanRename) {
1995 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
1996 RequiredClasses, TRI);
1997 }
1998 return RenameReg;
1999}
2000
2001/// Scan the instructions looking for a load/store that can be combined with the
2002/// current instruction into a wider equivalent or a load/store pair.
2004AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
2005 LdStPairFlags &Flags, unsigned Limit,
2006 bool FindNarrowMerge) {
2007 MachineBasicBlock::iterator E = I->getParent()->end();
2009 MachineBasicBlock::iterator MBBIWithRenameReg;
2010 MachineInstr &FirstMI = *I;
2011 MBBI = next_nodbg(MBBI, E);
2012
2013 bool MayLoad = FirstMI.mayLoad();
2014 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
2015 Register Reg = getLdStRegOp(FirstMI).getReg();
2018 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
2019 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
2020
2021 std::optional<bool> MaybeCanRename;
2022 if (!EnableRenaming)
2023 MaybeCanRename = {false};
2024
2025 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2026 LiveRegUnits UsedInBetween;
2027 UsedInBetween.init(*TRI);
2028
2029 Flags.clearRenameReg();
2030
2031 // Track which register units have been modified and used between the first
2032 // insn (inclusive) and the second insn.
2033 ModifiedRegUnits.clear();
2034 UsedRegUnits.clear();
2035
2036 // Remember any instructions that read/write memory between FirstMI and MI.
2037 SmallVector<MachineInstr *, 4> MemInsns;
2038
2039 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
2040 for (unsigned Count = 0; MBBI != E && Count < Limit;
2041 MBBI = next_nodbg(MBBI, E)) {
2042 MachineInstr &MI = *MBBI;
2043 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2044
2045 UsedInBetween.accumulate(MI);
2046
2047 // Don't count transient instructions towards the search limit since there
2048 // may be different numbers of them if e.g. debug information is present.
2049 if (!MI.isTransient())
2050 ++Count;
2051
2052 Flags.setSExtIdx(-1);
2053 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2055 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2056 // If we've found another instruction with the same opcode, check to see
2057 // if the base and offset are compatible with our starting instruction.
2058 // These instructions all have scaled immediate operands, so we just
2059 // check for +1/-1. Make sure to check the new instruction offset is
2060 // actually an immediate and not a symbolic reference destined for
2061 // a relocation.
2064 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2065 if (IsUnscaled != MIIsUnscaled) {
2066 // We're trying to pair instructions that differ in how they are scaled.
2067 // If FirstMI is scaled then scale the offset of MI accordingly.
2068 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2069 int MemSize = TII->getMemScale(MI);
2070 if (MIIsUnscaled) {
2071 // If the unscaled offset isn't a multiple of the MemSize, we can't
2072 // pair the operations together: bail and keep looking.
2073 if (MIOffset % MemSize) {
2074 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2075 UsedRegUnits, TRI);
2076 MemInsns.push_back(&MI);
2077 continue;
2078 }
2079 MIOffset /= MemSize;
2080 } else {
2081 MIOffset *= MemSize;
2082 }
2083 }
2084
2085 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2086
2087 if (BaseReg == MIBaseReg) {
2088 // If the offset of the second ld/st is not equal to the size of the
2089 // destination register it can’t be paired with a pre-index ld/st
2090 // pair. Additionally if the base reg is used or modified the operations
2091 // can't be paired: bail and keep looking.
2092 if (IsPreLdSt) {
2093 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2094 bool IsBaseRegUsed = !UsedRegUnits.available(
2096 bool IsBaseRegModified = !ModifiedRegUnits.available(
2098 // If the stored value and the address of the second instruction is
2099 // the same, it needs to be using the updated register and therefore
2100 // it must not be folded.
2101 bool IsMIRegTheSame =
2102 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2104 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2105 IsMIRegTheSame) {
2106 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2107 UsedRegUnits, TRI);
2108 MemInsns.push_back(&MI);
2109 continue;
2110 }
2111 } else {
2112 if ((Offset != MIOffset + OffsetStride) &&
2113 (Offset + OffsetStride != MIOffset)) {
2114 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2115 UsedRegUnits, TRI);
2116 MemInsns.push_back(&MI);
2117 continue;
2118 }
2119 }
2120
2121 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2122 if (FindNarrowMerge) {
2123 // If the alignment requirements of the scaled wide load/store
2124 // instruction can't express the offset of the scaled narrow input,
2125 // bail and keep looking. For promotable zero stores, allow only when
2126 // the stored value is the same (i.e., WZR).
2127 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2128 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2129 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2130 UsedRegUnits, TRI);
2131 MemInsns.push_back(&MI);
2132 continue;
2133 }
2134 } else {
2135 // Pairwise instructions have a 7-bit signed offset field. Single
2136 // insns have a 12-bit unsigned offset field. If the resultant
2137 // immediate offset of merging these instructions is out of range for
2138 // a pairwise instruction, bail and keep looking.
2139 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2140 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2141 UsedRegUnits, TRI);
2142 MemInsns.push_back(&MI);
2143 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2144 << "keep looking.\n");
2145 continue;
2146 }
2147 // If the alignment requirements of the paired (scaled) instruction
2148 // can't express the offset of the unscaled input, bail and keep
2149 // looking.
2150 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2151 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2152 UsedRegUnits, TRI);
2153 MemInsns.push_back(&MI);
2155 << "Offset doesn't fit due to alignment requirements, "
2156 << "keep looking.\n");
2157 continue;
2158 }
2159 }
2160
2161 // If the BaseReg has been modified, then we cannot do the optimization.
2162 // For example, in the following pattern
2163 // ldr x1 [x2]
2164 // ldr x2 [x3]
2165 // ldr x4 [x2, #8],
2166 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2167 if (!ModifiedRegUnits.available(BaseReg))
2168 return E;
2169
2170 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2172
2173 // If the Rt of the second instruction (destination register of the
2174 // load) was not modified or used between the two instructions and none
2175 // of the instructions between the second and first alias with the
2176 // second, we can combine the second into the first.
2177 bool RtNotModified =
2178 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2179 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2180 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2181
2182 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2183 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2184 << (RtNotModified ? "true" : "false") << "\n"
2185 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2186 << (RtNotUsed ? "true" : "false") << "\n");
2187
2188 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2189 // For pairs loading into the same reg, try to find a renaming
2190 // opportunity to allow the renaming of Reg between FirstMI and MI
2191 // and combine MI into FirstMI; otherwise bail and keep looking.
2192 if (SameLoadReg) {
2193 std::optional<MCPhysReg> RenameReg =
2194 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2195 Reg, DefinedInBB, UsedInBetween,
2196 RequiredClasses, TRI);
2197 if (!RenameReg) {
2198 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2199 UsedRegUnits, TRI);
2200 MemInsns.push_back(&MI);
2201 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2202 << "keep looking.\n");
2203 continue;
2204 }
2205 Flags.setRenameReg(*RenameReg);
2206 }
2207
2208 Flags.setMergeForward(false);
2209 if (!SameLoadReg)
2210 Flags.clearRenameReg();
2211 return MBBI;
2212 }
2213
2214 // Likewise, if the Rt of the first instruction is not modified or used
2215 // between the two instructions and none of the instructions between the
2216 // first and the second alias with the first, we can combine the first
2217 // into the second.
2218 RtNotModified = !(
2219 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2220
2221 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2222 << "Reg '" << getLdStRegOp(FirstMI)
2223 << "' not modified: "
2224 << (RtNotModified ? "true" : "false") << "\n");
2225
2226 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2227 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2228 Flags.setMergeForward(true);
2229 Flags.clearRenameReg();
2230 return MBBI;
2231 }
2232
2233 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2234 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2235 RequiredClasses, TRI);
2236 if (RenameReg) {
2237 Flags.setMergeForward(true);
2238 Flags.setRenameReg(*RenameReg);
2239 MBBIWithRenameReg = MBBI;
2240 }
2241 }
2242 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2243 << "interference in between, keep looking.\n");
2244 }
2245 }
2246
2247 if (Flags.getRenameReg())
2248 return MBBIWithRenameReg;
2249
2250 // If the instruction wasn't a matching load or store. Stop searching if we
2251 // encounter a call instruction that might modify memory.
2252 if (MI.isCall()) {
2253 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2254 return E;
2255 }
2256
2257 // Update modified / uses register units.
2258 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2259
2260 // Otherwise, if the base register is modified, we have no match, so
2261 // return early.
2262 if (!ModifiedRegUnits.available(BaseReg)) {
2263 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2264 return E;
2265 }
2266
2267 // Update list of instructions that read/write memory.
2268 if (MI.mayLoadOrStore())
2269 MemInsns.push_back(&MI);
2270 }
2271 return E;
2272}
2273
2276 assert((MI.getOpcode() == AArch64::SUBXri ||
2277 MI.getOpcode() == AArch64::ADDXri) &&
2278 "Expected a register update instruction");
2279 auto End = MI.getParent()->end();
2280 if (MaybeCFI == End ||
2281 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2282 !(MI.getFlag(MachineInstr::FrameSetup) ||
2283 MI.getFlag(MachineInstr::FrameDestroy)) ||
2284 MI.getOperand(0).getReg() != AArch64::SP)
2285 return End;
2286
2287 const MachineFunction &MF = *MI.getParent()->getParent();
2288 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2289 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2290 switch (CFI.getOperation()) {
2293 return MaybeCFI;
2294 default:
2295 return End;
2296 }
2297}
2298
2299std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2301 bool IsForward, bool IsPreIdx, bool MergeEither) {
2302 assert((Update->getOpcode() == AArch64::ADDXri ||
2303 Update->getOpcode() == AArch64::SUBXri) &&
2304 "Unexpected base register update instruction to merge!");
2305 MachineBasicBlock::iterator E = I->getParent()->end();
2307
2308 // If updating the SP and the following instruction is CFA offset related CFI,
2309 // make sure the CFI follows the SP update either by merging at the location
2310 // of the update or by moving the CFI after the merged instruction. If unable
2311 // to do so, bail.
2312 MachineBasicBlock::iterator InsertPt = I;
2313 if (IsForward) {
2314 assert(IsPreIdx);
2315 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2316 if (MergeEither) {
2317 InsertPt = Update;
2318 } else {
2319 // Take care not to reorder CFIs.
2320 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2321 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2322 }))
2323 return std::nullopt;
2324
2325 MachineBasicBlock *MBB = InsertPt->getParent();
2326 MBB->splice(std::next(InsertPt), MBB, CFI);
2327 }
2328 }
2329 }
2330
2331 // Return the instruction following the merged instruction, which is
2332 // the instruction following our unmerged load. Unless that's the add/sub
2333 // instruction we're merging, in which case it's the one after that.
2334 if (NextI == Update)
2335 NextI = next_nodbg(NextI, E);
2336
2337 int Value = Update->getOperand(2).getImm();
2338 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2339 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2340 if (Update->getOpcode() == AArch64::SUBXri)
2341 Value = -Value;
2342
2343 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2345 MachineInstrBuilder MIB;
2346 int Scale, MinOffset, MaxOffset;
2347 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2349 // Non-paired instruction.
2350 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2351 TII->get(NewOpc))
2352 .add(Update->getOperand(0))
2353 .add(getLdStRegOp(*I))
2355 .addImm(Value / Scale)
2356 .setMemRefs(I->memoperands())
2357 .setMIFlags(I->mergeFlagsWith(*Update));
2358 } else {
2359 // Paired instruction.
2360 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2361 TII->get(NewOpc))
2362 .add(Update->getOperand(0))
2363 .add(getLdStRegOp(*I, 0))
2364 .add(getLdStRegOp(*I, 1))
2366 .addImm(Value / Scale)
2367 .setMemRefs(I->memoperands())
2368 .setMIFlags(I->mergeFlagsWith(*Update));
2369 }
2370
2371 if (IsPreIdx) {
2372 ++NumPreFolded;
2373 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2374 } else {
2375 ++NumPostFolded;
2376 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2377 }
2378 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2379 LLVM_DEBUG(I->print(dbgs()));
2380 LLVM_DEBUG(dbgs() << " ");
2381 LLVM_DEBUG(Update->print(dbgs()));
2382 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2383 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2384 LLVM_DEBUG(dbgs() << "\n");
2385
2386 // Erase the old instructions for the block.
2387 I->eraseFromParent();
2388 Update->eraseFromParent();
2389
2390 return NextI;
2391}
2392
2394AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2396 unsigned Offset, int Scale) {
2397 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2398 "Unexpected const mov instruction to merge!");
2399 MachineBasicBlock::iterator E = I->getParent()->end();
2401 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2402 MachineInstr &MemMI = *I;
2403 unsigned Mask = (1 << 12) * Scale - 1;
2404 unsigned Low = Offset & Mask;
2405 unsigned High = Offset - Low;
2408 MachineInstrBuilder AddMIB, MemMIB;
2409
2410 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2411 AddMIB =
2412 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2413 .addDef(IndexReg)
2414 .addUse(BaseReg)
2415 .addImm(High >> 12) // shifted value
2416 .addImm(12); // shift 12
2417 (void)AddMIB;
2418 // Ld/St DestReg, IndexReg, Imm12
2419 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2420 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2421 .add(getLdStRegOp(MemMI))
2423 .addImm(Low / Scale)
2424 .setMemRefs(I->memoperands())
2425 .setMIFlags(I->mergeFlagsWith(*Update));
2426 (void)MemMIB;
2427
2428 ++NumConstOffsetFolded;
2429 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2430 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2431 LLVM_DEBUG(PrevI->print(dbgs()));
2432 LLVM_DEBUG(dbgs() << " ");
2433 LLVM_DEBUG(Update->print(dbgs()));
2434 LLVM_DEBUG(dbgs() << " ");
2435 LLVM_DEBUG(I->print(dbgs()));
2436 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2437 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2438 LLVM_DEBUG(dbgs() << " ");
2439 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2440 LLVM_DEBUG(dbgs() << "\n");
2441
2442 // Erase the old instructions for the block.
2443 I->eraseFromParent();
2444 PrevI->eraseFromParent();
2445 Update->eraseFromParent();
2446
2447 return NextI;
2448}
2449
2450bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2451 MachineInstr &MI,
2452 unsigned BaseReg, int Offset) {
2453 switch (MI.getOpcode()) {
2454 default:
2455 break;
2456 case AArch64::SUBXri:
2457 case AArch64::ADDXri:
2458 // Make sure it's a vanilla immediate operand, not a relocation or
2459 // anything else we can't handle.
2460 if (!MI.getOperand(2).isImm())
2461 break;
2462 // Watch out for 1 << 12 shifted value.
2463 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2464 break;
2465
2466 // The update instruction source and destination register must be the
2467 // same as the load/store base register.
2468 if (MI.getOperand(0).getReg() != BaseReg ||
2469 MI.getOperand(1).getReg() != BaseReg)
2470 break;
2471
2472 int UpdateOffset = MI.getOperand(2).getImm();
2473 if (MI.getOpcode() == AArch64::SUBXri)
2474 UpdateOffset = -UpdateOffset;
2475
2476 // The immediate must be a multiple of the scaling factor of the pre/post
2477 // indexed instruction.
2478 int Scale, MinOffset, MaxOffset;
2479 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2480 if (UpdateOffset % Scale != 0)
2481 break;
2482
2483 // Scaled offset must fit in the instruction immediate.
2484 int ScaledOffset = UpdateOffset / Scale;
2485 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2486 break;
2487
2488 // If we have a non-zero Offset, we check that it matches the amount
2489 // we're adding to the register.
2490 if (!Offset || Offset == UpdateOffset)
2491 return true;
2492 break;
2493 }
2494 return false;
2495}
2496
2497bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2498 MachineInstr &MI,
2499 unsigned IndexReg,
2500 unsigned &Offset) {
2501 // The update instruction source and destination register must be the
2502 // same as the load/store index register.
2503 if (MI.getOpcode() == AArch64::MOVKWi &&
2504 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2505
2506 // movz + movk hold a large offset of a Ld/St instruction.
2507 MachineBasicBlock::iterator B = MI.getParent()->begin();
2509 // Skip the scene when the MI is the first instruction of a block.
2510 if (MBBI == B)
2511 return false;
2512 MBBI = prev_nodbg(MBBI, B);
2513 MachineInstr &MovzMI = *MBBI;
2514 // Make sure the MOVKWi and MOVZWi set the same register.
2515 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2516 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2517 unsigned Low = MovzMI.getOperand(1).getImm();
2518 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2519 Offset = High + Low;
2520 // 12-bit optionally shifted immediates are legal for adds.
2521 return Offset >> 24 == 0;
2522 }
2523 }
2524 return false;
2525}
2526
2527MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2528 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2529 MachineBasicBlock::iterator E = I->getParent()->end();
2530 MachineInstr &MemMI = *I;
2532
2534 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2535 TII->getMemScale(MemMI);
2536
2537 // Scan forward looking for post-index opportunities. Updating instructions
2538 // can't be formed if the memory instruction doesn't have the offset we're
2539 // looking for.
2540 if (MIUnscaledOffset != UnscaledOffset)
2541 return E;
2542
2543 // If the base register overlaps a source/destination register, we can't
2544 // merge the update. This does not apply to tag store instructions which
2545 // ignore the address part of the source register.
2546 // This does not apply to STGPi as well, which does not have unpredictable
2547 // behavior in this case unlike normal stores, and always performs writeback
2548 // after reading the source register value.
2549 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2550 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2551 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2552 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2553 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2554 return E;
2555 }
2556 }
2557
2558 // Track which register units have been modified and used between the first
2559 // insn (inclusive) and the second insn.
2560 ModifiedRegUnits.clear();
2561 UsedRegUnits.clear();
2562 MBBI = next_nodbg(MBBI, E);
2563
2564 // We can't post-increment the stack pointer if any instruction between
2565 // the memory access (I) and the increment (MBBI) can access the memory
2566 // region defined by [SP, MBBI].
2567 const bool BaseRegSP = BaseReg == AArch64::SP;
2568 if (BaseRegSP && needsWinCFI(I->getMF())) {
2569 // FIXME: For now, we always block the optimization over SP in windows
2570 // targets as it requires to adjust the unwind/debug info, messing up
2571 // the unwind info can actually cause a miscompile.
2572 return E;
2573 }
2574
2575 unsigned Count = 0;
2576 MachineBasicBlock *CurMBB = I->getParent();
2577 // choice of next block to visit is liveins-based
2578 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2579
2580 while (true) {
2581 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2582 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2583 MachineInstr &MI = *MBBI;
2584
2585 // Don't count transient instructions towards the search limit since there
2586 // may be different numbers of them if e.g. debug information is present.
2587 if (!MI.isTransient())
2588 ++Count;
2589
2590 // If we found a match, return it.
2591 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2592 return MBBI;
2593
2594 // Update the status of what the instruction clobbered and used.
2595 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2596 TRI);
2597
2598 // Otherwise, if the base register is used or modified, we have no match,
2599 // so return early. If we are optimizing SP, do not allow instructions
2600 // that may load or store in between the load and the optimized value
2601 // update.
2602 if (!ModifiedRegUnits.available(BaseReg) ||
2603 !UsedRegUnits.available(BaseReg) ||
2604 (BaseRegSP && MBBI->mayLoadOrStore()))
2605 return E;
2606 }
2607
2608 if (!VisitSucc || Limit <= Count)
2609 break;
2610
2611 // Try to go downward to successors along a CF path w/o side enters
2612 // such that BaseReg is alive along it but not at its exits
2613 MachineBasicBlock *SuccToVisit = nullptr;
2614 unsigned LiveSuccCount = 0;
2615 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2616 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2617 if (Succ->isLiveIn(*AI)) {
2618 if (LiveSuccCount++)
2619 return E;
2620 if (Succ->pred_size() == 1)
2621 SuccToVisit = Succ;
2622 break;
2623 }
2624 }
2625 }
2626 if (!SuccToVisit)
2627 break;
2628 CurMBB = SuccToVisit;
2629 MBBI = CurMBB->begin();
2630 }
2631
2632 return E;
2633}
2634
2635MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2636 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2637 MachineBasicBlock::iterator B = I->getParent()->begin();
2638 MachineBasicBlock::iterator E = I->getParent()->end();
2639 MachineInstr &MemMI = *I;
2641 MachineFunction &MF = *MemMI.getMF();
2642
2645
2646 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2647 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2648 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2649 : AArch64::NoRegister};
2650
2651 // If the load/store is the first instruction in the block, there's obviously
2652 // not any matching update. Ditto if the memory offset isn't zero.
2653 if (MBBI == B || Offset != 0)
2654 return E;
2655 // If the base register overlaps a destination register, we can't
2656 // merge the update.
2657 if (!isTagStore(MemMI)) {
2658 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2659 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2660 return E;
2661 }
2662
2663 const bool BaseRegSP = BaseReg == AArch64::SP;
2664 if (BaseRegSP && needsWinCFI(I->getMF())) {
2665 // FIXME: For now, we always block the optimization over SP in windows
2666 // targets as it requires to adjust the unwind/debug info, messing up
2667 // the unwind info can actually cause a miscompile.
2668 return E;
2669 }
2670
2671 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2672 unsigned RedZoneSize =
2673 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2674
2675 // Track which register units have been modified and used between the first
2676 // insn (inclusive) and the second insn.
2677 ModifiedRegUnits.clear();
2678 UsedRegUnits.clear();
2679 unsigned Count = 0;
2680 bool MemAccessBeforeSPPreInc = false;
2681 MergeEither = true;
2682 do {
2683 MBBI = prev_nodbg(MBBI, B);
2684 MachineInstr &MI = *MBBI;
2685
2686 // Don't count transient instructions towards the search limit since there
2687 // may be different numbers of them if e.g. debug information is present.
2688 if (!MI.isTransient())
2689 ++Count;
2690
2691 // If we found a match, return it.
2692 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2693 // Check that the update value is within our red zone limit (which may be
2694 // zero).
2695 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2696 return E;
2697 return MBBI;
2698 }
2699
2700 // Update the status of what the instruction clobbered and used.
2701 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2702
2703 // Otherwise, if the base register is used or modified, we have no match, so
2704 // return early.
2705 if (!ModifiedRegUnits.available(BaseReg) ||
2706 !UsedRegUnits.available(BaseReg))
2707 return E;
2708
2709 // If we have a destination register (i.e. a load instruction) and a
2710 // destination register is used or modified, then we can only merge forward,
2711 // i.e. the combined instruction is put in the place of the memory
2712 // instruction. Same applies if we see a memory access or side effects.
2713 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2714 (DestReg[0] != AArch64::NoRegister &&
2715 !(ModifiedRegUnits.available(DestReg[0]) &&
2716 UsedRegUnits.available(DestReg[0]))) ||
2717 (DestReg[1] != AArch64::NoRegister &&
2718 !(ModifiedRegUnits.available(DestReg[1]) &&
2719 UsedRegUnits.available(DestReg[1]))))
2720 MergeEither = false;
2721
2722 // Keep track if we have a memory access before an SP pre-increment, in this
2723 // case we need to validate later that the update amount respects the red
2724 // zone.
2725 if (BaseRegSP && MBBI->mayLoadOrStore())
2726 MemAccessBeforeSPPreInc = true;
2727 } while (MBBI != B && Count < Limit);
2728 return E;
2729}
2730
2732AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2733 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2734 MachineBasicBlock::iterator B = I->getParent()->begin();
2735 MachineBasicBlock::iterator E = I->getParent()->end();
2736 MachineInstr &MemMI = *I;
2738
2739 // If the load is the first instruction in the block, there's obviously
2740 // not any matching load or store.
2741 if (MBBI == B)
2742 return E;
2743
2744 // Make sure the IndexReg is killed and the shift amount is zero.
2745 // TODO: Relex this restriction to extend, simplify processing now.
2746 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2747 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2749 return E;
2750
2752
2753 // Track which register units have been modified and used between the first
2754 // insn (inclusive) and the second insn.
2755 ModifiedRegUnits.clear();
2756 UsedRegUnits.clear();
2757 unsigned Count = 0;
2758 do {
2759 MBBI = prev_nodbg(MBBI, B);
2760 MachineInstr &MI = *MBBI;
2761
2762 // Don't count transient instructions towards the search limit since there
2763 // may be different numbers of them if e.g. debug information is present.
2764 if (!MI.isTransient())
2765 ++Count;
2766
2767 // If we found a match, return it.
2768 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2769 return MBBI;
2770 }
2771
2772 // Update the status of what the instruction clobbered and used.
2773 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2774
2775 // Otherwise, if the index register is used or modified, we have no match,
2776 // so return early.
2777 if (!ModifiedRegUnits.available(IndexReg) ||
2778 !UsedRegUnits.available(IndexReg))
2779 return E;
2780
2781 } while (MBBI != B && Count < Limit);
2782 return E;
2783}
2784
2785bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2787 MachineInstr &MI = *MBBI;
2788 // If this is a volatile load, don't mess with it.
2789 if (MI.hasOrderedMemoryRef())
2790 return false;
2791
2792 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2793 return false;
2794
2795 // Make sure this is a reg+imm.
2796 // FIXME: It is possible to extend it to handle reg+reg cases.
2798 return false;
2799
2800 // Look backward up to LdStLimit instructions.
2802 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2803 ++NumLoadsFromStoresPromoted;
2804 // Promote the load. Keeping the iterator straight is a
2805 // pain, so we let the merge routine tell us what the next instruction
2806 // is after it's done mucking about.
2807 MBBI = promoteLoadFromStore(MBBI, StoreI);
2808 return true;
2809 }
2810 return false;
2811}
2812
2813// Merge adjacent zero stores into a wider store.
2814bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2816 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2817 MachineInstr &MI = *MBBI;
2818 MachineBasicBlock::iterator E = MI.getParent()->end();
2819
2820 if (!TII->isCandidateToMergeOrPair(MI))
2821 return false;
2822
2823 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2824 LdStPairFlags Flags;
2826 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2827 if (MergeMI != E) {
2828 ++NumZeroStoresPromoted;
2829
2830 // Keeping the iterator straight is a pain, so we let the merge routine tell
2831 // us what the next instruction is after it's done mucking about.
2832 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2833 return true;
2834 }
2835 return false;
2836}
2837
2838// Find loads and stores that can be merged into a single load or store pair
2839// instruction.
2840bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2841 MachineInstr &MI = *MBBI;
2842 MachineBasicBlock::iterator E = MI.getParent()->end();
2843
2844 if (!TII->isCandidateToMergeOrPair(MI))
2845 return false;
2846
2847 // If disable-ldp feature is opted, do not emit ldp.
2848 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2849 return false;
2850
2851 // If disable-stp feature is opted, do not emit stp.
2852 if (MI.mayStore() && Subtarget->hasDisableStp())
2853 return false;
2854
2855 // Early exit if the offset is not possible to match. (6 bits of positive
2856 // range, plus allow an extra one in case we find a later insn that matches
2857 // with Offset-1)
2858 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2860 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2861 // Allow one more for offset.
2862 if (Offset > 0)
2863 Offset -= OffsetStride;
2864 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2865 return false;
2866
2867 // Look ahead up to LdStLimit instructions for a pairable instruction.
2868 LdStPairFlags Flags;
2870 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2871 if (Paired != E) {
2872 // Keeping the iterator straight is a pain, so we let the merge routine tell
2873 // us what the next instruction is after it's done mucking about.
2874 auto Prev = std::prev(MBBI);
2875
2876 // Fetch the memoperand of the load/store that is a candidate for
2877 // combination.
2878 MachineMemOperand *MemOp =
2879 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2880
2881 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2882 // that the alignment of the source pointer is at least double the alignment
2883 // of the type.
2884 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2885 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2886 // If there is no size/align information, cancel the transformation.
2887 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2888 NumFailedAlignmentCheck++;
2889 return false;
2890 }
2891
2892 // Get the needed alignments to check them if
2893 // ldp-aligned-only/stp-aligned-only features are opted.
2894 uint64_t MemAlignment = MemOp->getAlign().value();
2895 uint64_t TypeAlignment =
2896 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2897
2898 if (MemAlignment < 2 * TypeAlignment) {
2899 NumFailedAlignmentCheck++;
2900 return false;
2901 }
2902 }
2903
2904 ++NumPairCreated;
2905 if (TII->hasUnscaledLdStOffset(MI))
2906 ++NumUnscaledPairCreated;
2907
2908 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2909 // Collect liveness info for instructions between Prev and the new position
2910 // MBBI.
2911 for (auto I = std::next(Prev); I != MBBI; I++)
2912 updateDefinedRegisters(*I, DefinedInBB, TRI);
2913
2914 return true;
2915 }
2916 return false;
2917}
2918
2919bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2921 MachineInstr &MI = *MBBI;
2922 MachineBasicBlock::iterator E = MI.getParent()->end();
2924
2925 // Look forward to try to form a post-index instruction. For example,
2926 // ldr x0, [x20]
2927 // add x20, x20, #32
2928 // merged into:
2929 // ldr x0, [x20], #32
2930 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2931 if (Update != E) {
2932 // Merge the update into the ld/st.
2933 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2934 /*IsPreIdx=*/false,
2935 /*MergeEither=*/false)) {
2936 MBBI = *NextI;
2937 return true;
2938 }
2939 }
2940
2941 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2942 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2943 return false;
2944
2945 // Look back to try to find a pre-index instruction. For example,
2946 // add x0, x0, #8
2947 // ldr x1, [x0]
2948 // merged into:
2949 // ldr x1, [x0, #8]!
2950 bool MergeEither;
2951 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2952 if (Update != E) {
2953 // Merge the update into the ld/st.
2954 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2955 /*IsPreIdx=*/true, MergeEither)) {
2956 MBBI = *NextI;
2957 return true;
2958 }
2959 }
2960
2961 // The immediate in the load/store is scaled by the size of the memory
2962 // operation. The immediate in the add we're looking for,
2963 // however, is not, so adjust here.
2964 int UnscaledOffset =
2966
2967 // Look forward to try to find a pre-index instruction. For example,
2968 // ldr x1, [x0, #64]
2969 // add x0, x0, #64
2970 // merged into:
2971 // ldr x1, [x0, #64]!
2972 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2973 if (Update != E) {
2974 // Merge the update into the ld/st.
2975 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2976 /*IsPreIdx=*/true,
2977 /*MergeEither=*/false)) {
2978 MBBI = *NextI;
2979 return true;
2980 }
2981 }
2982
2983 return false;
2984}
2985
2986bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
2987 int Scale) {
2988 MachineInstr &MI = *MBBI;
2989 MachineBasicBlock::iterator E = MI.getParent()->end();
2991
2992 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2993 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2994 return false;
2995
2996 // Look back to try to find a const offset for index LdSt instruction. For
2997 // example,
2998 // mov x8, #LargeImm ; = a * (1<<12) + imm12
2999 // ldr x1, [x0, x8]
3000 // merged into:
3001 // add x8, x0, a * (1<<12)
3002 // ldr x1, [x8, imm12]
3003 unsigned Offset;
3004 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
3005 if (Update != E && (Offset & (Scale - 1)) == 0) {
3006 // Merge the imm12 into the ld/st.
3007 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
3008 return true;
3009 }
3010
3011 return false;
3012}
3013
3014bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
3015 bool EnableNarrowZeroStOpt) {
3016 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
3017
3018 bool Modified = false;
3019 // Four transformations to do here:
3020 // 1) Find loads that directly read from stores and promote them by
3021 // replacing with mov instructions. If the store is wider than the load,
3022 // the load will be replaced with a bitfield extract.
3023 // e.g.,
3024 // str w1, [x0, #4]
3025 // ldrh w2, [x0, #6]
3026 // ; becomes
3027 // str w1, [x0, #4]
3028 // lsr w2, w1, #16
3030 MBBI != E;) {
3031 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
3032 Modified = true;
3033 else
3034 ++MBBI;
3035 }
3036 // 2) Merge adjacent zero stores into a wider store.
3037 // e.g.,
3038 // strh wzr, [x0]
3039 // strh wzr, [x0, #2]
3040 // ; becomes
3041 // str wzr, [x0]
3042 // e.g.,
3043 // str wzr, [x0]
3044 // str wzr, [x0, #4]
3045 // ; becomes
3046 // str xzr, [x0]
3047 if (EnableNarrowZeroStOpt)
3049 MBBI != E;) {
3050 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3051 Modified = true;
3052 else
3053 ++MBBI;
3054 }
3055 // 3) Find loads and stores that can be merged into a single load or store
3056 // pair instruction.
3057 // When compiling for SVE 128, also try to combine SVE fill/spill
3058 // instructions into LDP/STP.
3059 // e.g.,
3060 // ldr x0, [x2]
3061 // ldr x1, [x2, #8]
3062 // ; becomes
3063 // ldp x0, x1, [x2]
3064 // e.g.,
3065 // ldr z0, [x2]
3066 // ldr z1, [x2, #1, mul vl]
3067 // ; becomes
3068 // ldp q0, q1, [x2]
3069
3071 DefinedInBB.clear();
3072 DefinedInBB.addLiveIns(MBB);
3073 }
3074
3076 MBBI != E;) {
3077 // Track currently live registers up to this point, to help with
3078 // searching for a rename register on demand.
3079 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3080 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3081 Modified = true;
3082 else
3083 ++MBBI;
3084 }
3085 // 4) Find base register updates that can be merged into the load or store
3086 // as a base-reg writeback.
3087 // e.g.,
3088 // ldr x0, [x2]
3089 // add x2, x2, #4
3090 // ; becomes
3091 // ldr x0, [x2], #4
3093 MBBI != E;) {
3094 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3095 Modified = true;
3096 else
3097 ++MBBI;
3098 }
3099
3100 // 5) Find a register assigned with a const value that can be combined with
3101 // into the load or store. e.g.,
3102 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3103 // ldr x1, [x0, x8]
3104 // ; becomes
3105 // add x8, x0, a * (1<<12)
3106 // ldr x1, [x8, imm12]
3108 MBBI != E;) {
3109 int Scale;
3110 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3111 Modified = true;
3112 else
3113 ++MBBI;
3114 }
3115
3116 return Modified;
3117}
3118
3119bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3120 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3121 TII = Subtarget->getInstrInfo();
3122 TRI = Subtarget->getRegisterInfo();
3123
3124 // Resize the modified and used register unit trackers. We do this once
3125 // per function and then clear the register units each time we optimize a load
3126 // or store.
3127 ModifiedRegUnits.init(*TRI);
3128 UsedRegUnits.init(*TRI);
3129 DefinedInBB.init(*TRI);
3130
3131 bool Modified = false;
3132 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3133 for (auto &MBB : Fn) {
3134 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3135 Modified |= M;
3136 }
3137
3138 return Modified;
3139}
3140
3141// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3142// stores near one another? Note: The pre-RA instruction scheduler already has
3143// hooks to try and schedule pairable loads/stores together to improve pairing
3144// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3145
3146// FIXME: When pairing store instructions it's very possible for this pass to
3147// hoist a store with a KILL marker above another use (without a KILL marker).
3148// The resulting IR is invalid, but nothing uses the KILL markers after this
3149// pass, so it's never caused a problem in practice.
3150
3151bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3152 if (skipFunction(MF.getFunction()))
3153 return false;
3154 AArch64LoadStoreOpt Impl;
3155 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3156 return Impl.runOnMachineFunction(MF);
3157}
3158
3159/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3160/// load / store optimization pass.
3162 return new AArch64LoadStoreOptLegacy();
3163}
3164
3168 AArch64LoadStoreOpt Impl;
3170 .getManager()
3171 .getResult<AAManager>(MF.getFunction());
3172 bool Changed = Impl.runOnMachineFunction(MF);
3173 if (!Changed)
3174 return PreservedAnalyses::all();
3177 return PA;
3178}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:689
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:655
OpType getOperation() const
Definition MCDwarf.h:714
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.