54#define DEBUG_TYPE "aarch64-ldst-opt"
56STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
57STATISTIC(NumPostFolded,
"Number of post-index updates folded");
58STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
60 "Number of load/store from unscaled generated");
61STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
62STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
63STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
64 "not passed the alignment check");
66 "Number of const offset of index address folded");
69 "Controls which pairs are considered for renaming");
89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
93using LdStPairFlags =
struct LdStPairFlags {
97 bool MergeForward =
false;
108 std::optional<MCPhysReg> RenameReg;
110 LdStPairFlags() =
default;
112 void setMergeForward(
bool V =
true) { MergeForward = V; }
113 bool getMergeForward()
const {
return MergeForward; }
115 void setSExtIdx(
int V) { SExtIdx = V; }
116 int getSExtIdx()
const {
return SExtIdx; }
118 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
119 void clearRenameReg() { RenameReg = std::nullopt; }
120 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
123struct AArch64LoadStoreOpt {
137 LdStPairFlags &Flags,
139 bool FindNarrowMerge);
150 const LdStPairFlags &Flags);
156 const LdStPairFlags &Flags);
168 int UnscaledOffset,
unsigned Limit);
190 unsigned BaseReg,
int Offset);
193 unsigned IndexReg,
unsigned &
Offset);
196 std::optional<MachineBasicBlock::iterator>
199 bool IsPreIdx,
bool MergeEither);
245char AArch64LoadStoreOptLegacy::ID = 0;
252static
bool isNarrowStore(
unsigned Opc) {
256 case AArch64::STRBBui:
257 case AArch64::STURBBi:
258 case AArch64::STRHHui:
259 case AArch64::STURHHi:
267 switch (
MI.getOpcode()) {
273 case AArch64::STZ2Gi:
279 bool *IsValidLdStrOpc =
nullptr) {
281 *IsValidLdStrOpc =
true;
285 *IsValidLdStrOpc =
false;
286 return std::numeric_limits<unsigned>::max();
287 case AArch64::STRDui:
288 case AArch64::STURDi:
289 case AArch64::STRDpre:
290 case AArch64::STRQui:
291 case AArch64::STURQi:
292 case AArch64::STRQpre:
293 case AArch64::STRBBui:
294 case AArch64::STURBBi:
295 case AArch64::STRHHui:
296 case AArch64::STURHHi:
297 case AArch64::STRWui:
298 case AArch64::STRWpre:
299 case AArch64::STURWi:
300 case AArch64::STRXui:
301 case AArch64::STRXpre:
302 case AArch64::STURXi:
303 case AArch64::STR_ZXI:
304 case AArch64::LDRDui:
305 case AArch64::LDURDi:
306 case AArch64::LDRDpre:
307 case AArch64::LDRQui:
308 case AArch64::LDURQi:
309 case AArch64::LDRQpre:
310 case AArch64::LDRWui:
311 case AArch64::LDURWi:
312 case AArch64::LDRWpre:
313 case AArch64::LDRXui:
314 case AArch64::LDURXi:
315 case AArch64::LDRXpre:
316 case AArch64::STRSui:
317 case AArch64::STURSi:
318 case AArch64::STRSpre:
319 case AArch64::LDRSui:
320 case AArch64::LDURSi:
321 case AArch64::LDRSpre:
322 case AArch64::LDR_ZXI:
324 case AArch64::LDRSWui:
325 return AArch64::LDRWui;
326 case AArch64::LDURSWi:
327 return AArch64::LDURWi;
328 case AArch64::LDRSWpre:
329 return AArch64::LDRWpre;
337 case AArch64::STRBBui:
338 return AArch64::STRHHui;
339 case AArch64::STRHHui:
340 return AArch64::STRWui;
341 case AArch64::STURBBi:
342 return AArch64::STURHHi;
343 case AArch64::STURHHi:
344 return AArch64::STURWi;
345 case AArch64::STURWi:
346 return AArch64::STURXi;
347 case AArch64::STRWui:
348 return AArch64::STRXui;
356 case AArch64::STRSui:
357 case AArch64::STURSi:
358 return AArch64::STPSi;
359 case AArch64::STRSpre:
360 return AArch64::STPSpre;
361 case AArch64::STRDui:
362 case AArch64::STURDi:
363 return AArch64::STPDi;
364 case AArch64::STRDpre:
365 return AArch64::STPDpre;
366 case AArch64::STRQui:
367 case AArch64::STURQi:
368 case AArch64::STR_ZXI:
369 return AArch64::STPQi;
370 case AArch64::STRQpre:
371 return AArch64::STPQpre;
372 case AArch64::STRWui:
373 case AArch64::STURWi:
374 return AArch64::STPWi;
375 case AArch64::STRWpre:
376 return AArch64::STPWpre;
377 case AArch64::STRXui:
378 case AArch64::STURXi:
379 return AArch64::STPXi;
380 case AArch64::STRXpre:
381 return AArch64::STPXpre;
382 case AArch64::LDRSui:
383 case AArch64::LDURSi:
384 return AArch64::LDPSi;
385 case AArch64::LDRSpre:
386 return AArch64::LDPSpre;
387 case AArch64::LDRDui:
388 case AArch64::LDURDi:
389 return AArch64::LDPDi;
390 case AArch64::LDRDpre:
391 return AArch64::LDPDpre;
392 case AArch64::LDRQui:
393 case AArch64::LDURQi:
394 case AArch64::LDR_ZXI:
395 return AArch64::LDPQi;
396 case AArch64::LDRQpre:
397 return AArch64::LDPQpre;
398 case AArch64::LDRWui:
399 case AArch64::LDURWi:
400 return AArch64::LDPWi;
401 case AArch64::LDRWpre:
402 return AArch64::LDPWpre;
403 case AArch64::LDRXui:
404 case AArch64::LDURXi:
405 return AArch64::LDPXi;
406 case AArch64::LDRXpre:
407 return AArch64::LDPXpre;
408 case AArch64::LDRSWui:
409 case AArch64::LDURSWi:
410 return AArch64::LDPSWi;
411 case AArch64::LDRSWpre:
412 return AArch64::LDPSWpre;
423 case AArch64::LDRBBui:
424 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
425 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
426 case AArch64::LDURBBi:
427 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
428 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
429 case AArch64::LDRHHui:
430 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
431 StOpc == AArch64::STRXui;
432 case AArch64::LDURHHi:
433 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
434 StOpc == AArch64::STURXi;
435 case AArch64::LDRWui:
436 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
437 case AArch64::LDURWi:
438 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
439 case AArch64::LDRXui:
440 return StOpc == AArch64::STRXui;
441 case AArch64::LDURXi:
442 return StOpc == AArch64::STURXi;
454 case AArch64::STRBui:
455 return AArch64::STRBpre;
456 case AArch64::STRHui:
457 return AArch64::STRHpre;
458 case AArch64::STRSui:
459 return AArch64::STRSpre;
460 case AArch64::STRDui:
461 return AArch64::STRDpre;
462 case AArch64::STRQui:
463 return AArch64::STRQpre;
464 case AArch64::STRBBui:
465 return AArch64::STRBBpre;
466 case AArch64::STRHHui:
467 return AArch64::STRHHpre;
468 case AArch64::STRWui:
469 return AArch64::STRWpre;
470 case AArch64::STRXui:
471 return AArch64::STRXpre;
472 case AArch64::LDRBui:
473 return AArch64::LDRBpre;
474 case AArch64::LDRHui:
475 return AArch64::LDRHpre;
476 case AArch64::LDRSui:
477 return AArch64::LDRSpre;
478 case AArch64::LDRDui:
479 return AArch64::LDRDpre;
480 case AArch64::LDRQui:
481 return AArch64::LDRQpre;
482 case AArch64::LDRBBui:
483 return AArch64::LDRBBpre;
484 case AArch64::LDRHHui:
485 return AArch64::LDRHHpre;
486 case AArch64::LDRWui:
487 return AArch64::LDRWpre;
488 case AArch64::LDRXui:
489 return AArch64::LDRXpre;
490 case AArch64::LDRSWui:
491 return AArch64::LDRSWpre;
493 return AArch64::LDPSpre;
494 case AArch64::LDPSWi:
495 return AArch64::LDPSWpre;
497 return AArch64::LDPDpre;
499 return AArch64::LDPQpre;
501 return AArch64::LDPWpre;
503 return AArch64::LDPXpre;
505 return AArch64::STPSpre;
507 return AArch64::STPDpre;
509 return AArch64::STPQpre;
511 return AArch64::STPWpre;
513 return AArch64::STPXpre;
515 return AArch64::STGPreIndex;
517 return AArch64::STZGPreIndex;
519 return AArch64::ST2GPreIndex;
520 case AArch64::STZ2Gi:
521 return AArch64::STZ2GPreIndex;
523 return AArch64::STGPpre;
532 case AArch64::LDRBroX:
533 return AArch64::LDRBui;
534 case AArch64::LDRBBroX:
535 return AArch64::LDRBBui;
536 case AArch64::LDRSBXroX:
537 return AArch64::LDRSBXui;
538 case AArch64::LDRSBWroX:
539 return AArch64::LDRSBWui;
540 case AArch64::LDRHroX:
541 return AArch64::LDRHui;
542 case AArch64::LDRHHroX:
543 return AArch64::LDRHHui;
544 case AArch64::LDRSHXroX:
545 return AArch64::LDRSHXui;
546 case AArch64::LDRSHWroX:
547 return AArch64::LDRSHWui;
548 case AArch64::LDRWroX:
549 return AArch64::LDRWui;
550 case AArch64::LDRSroX:
551 return AArch64::LDRSui;
552 case AArch64::LDRSWroX:
553 return AArch64::LDRSWui;
554 case AArch64::LDRDroX:
555 return AArch64::LDRDui;
556 case AArch64::LDRXroX:
557 return AArch64::LDRXui;
558 case AArch64::LDRQroX:
559 return AArch64::LDRQui;
567 case AArch64::STRBui:
568 return AArch64::STRBpost;
569 case AArch64::STRHui:
570 return AArch64::STRHpost;
571 case AArch64::STRSui:
572 case AArch64::STURSi:
573 return AArch64::STRSpost;
574 case AArch64::STRDui:
575 case AArch64::STURDi:
576 return AArch64::STRDpost;
577 case AArch64::STRQui:
578 case AArch64::STURQi:
579 return AArch64::STRQpost;
580 case AArch64::STRBBui:
581 return AArch64::STRBBpost;
582 case AArch64::STRHHui:
583 return AArch64::STRHHpost;
584 case AArch64::STRWui:
585 case AArch64::STURWi:
586 return AArch64::STRWpost;
587 case AArch64::STRXui:
588 case AArch64::STURXi:
589 return AArch64::STRXpost;
590 case AArch64::LDRBui:
591 return AArch64::LDRBpost;
592 case AArch64::LDRHui:
593 return AArch64::LDRHpost;
594 case AArch64::LDRSui:
595 case AArch64::LDURSi:
596 return AArch64::LDRSpost;
597 case AArch64::LDRDui:
598 case AArch64::LDURDi:
599 return AArch64::LDRDpost;
600 case AArch64::LDRQui:
601 case AArch64::LDURQi:
602 return AArch64::LDRQpost;
603 case AArch64::LDRBBui:
604 return AArch64::LDRBBpost;
605 case AArch64::LDRHHui:
606 return AArch64::LDRHHpost;
607 case AArch64::LDRWui:
608 case AArch64::LDURWi:
609 return AArch64::LDRWpost;
610 case AArch64::LDRXui:
611 case AArch64::LDURXi:
612 return AArch64::LDRXpost;
613 case AArch64::LDRSWui:
614 return AArch64::LDRSWpost;
616 return AArch64::LDPSpost;
617 case AArch64::LDPSWi:
618 return AArch64::LDPSWpost;
620 return AArch64::LDPDpost;
622 return AArch64::LDPQpost;
624 return AArch64::LDPWpost;
626 return AArch64::LDPXpost;
628 return AArch64::STPSpost;
630 return AArch64::STPDpost;
632 return AArch64::STPQpost;
634 return AArch64::STPWpost;
636 return AArch64::STPXpost;
638 return AArch64::STGPostIndex;
640 return AArch64::STZGPostIndex;
642 return AArch64::ST2GPostIndex;
643 case AArch64::STZ2Gi:
644 return AArch64::STZ2GPostIndex;
646 return AArch64::STGPpost;
653 unsigned OpcB =
MI.getOpcode();
658 case AArch64::STRSpre:
659 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
660 case AArch64::STRDpre:
661 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
662 case AArch64::STRQpre:
663 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
664 case AArch64::STRWpre:
665 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
666 case AArch64::STRXpre:
667 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
668 case AArch64::LDRSpre:
669 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
670 case AArch64::LDRDpre:
671 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
672 case AArch64::LDRQpre:
673 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
674 case AArch64::LDRWpre:
675 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
676 case AArch64::LDRXpre:
677 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
678 case AArch64::LDRSWpre:
679 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
685 int &MinOffset,
int &MaxOffset) {
703 unsigned PairedRegOp = 0) {
704 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
710 return MI.getOperand(Idx);
719 int UnscaledStOffset =
723 int UnscaledLdOffset =
727 return (UnscaledStOffset <= UnscaledLdOffset) &&
728 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
732 unsigned Opc =
MI.getOpcode();
733 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
734 isNarrowStore(
Opc)) &&
739 switch (
MI.getOpcode()) {
743 case AArch64::LDRBBui:
744 case AArch64::LDRHHui:
745 case AArch64::LDRWui:
746 case AArch64::LDRXui:
748 case AArch64::LDURBBi:
749 case AArch64::LDURHHi:
750 case AArch64::LDURWi:
751 case AArch64::LDURXi:
757 unsigned Opc =
MI.getOpcode();
762 case AArch64::STRBui:
763 case AArch64::STRHui:
764 case AArch64::STRSui:
765 case AArch64::STRDui:
766 case AArch64::STRQui:
767 case AArch64::STRXui:
768 case AArch64::STRWui:
769 case AArch64::STRHHui:
770 case AArch64::STRBBui:
771 case AArch64::LDRBui:
772 case AArch64::LDRHui:
773 case AArch64::LDRSui:
774 case AArch64::LDRDui:
775 case AArch64::LDRQui:
776 case AArch64::LDRXui:
777 case AArch64::LDRWui:
778 case AArch64::LDRHHui:
779 case AArch64::LDRBBui:
783 case AArch64::STZ2Gi:
786 case AArch64::STURSi:
787 case AArch64::STURDi:
788 case AArch64::STURQi:
789 case AArch64::STURWi:
790 case AArch64::STURXi:
791 case AArch64::LDURSi:
792 case AArch64::LDURDi:
793 case AArch64::LDURQi:
794 case AArch64::LDURWi:
795 case AArch64::LDURXi:
798 case AArch64::LDPSWi:
827 unsigned Opc =
MI.getOpcode();
833 case AArch64::LDRBroX:
834 case AArch64::LDRBBroX:
835 case AArch64::LDRSBXroX:
836 case AArch64::LDRSBWroX:
839 case AArch64::LDRHroX:
840 case AArch64::LDRHHroX:
841 case AArch64::LDRSHXroX:
842 case AArch64::LDRSHWroX:
845 case AArch64::LDRWroX:
846 case AArch64::LDRSroX:
847 case AArch64::LDRSWroX:
850 case AArch64::LDRDroX:
851 case AArch64::LDRXroX:
854 case AArch64::LDRQroX:
864 case AArch64::ORRWrs:
865 case AArch64::ADDWri:
873 const LdStPairFlags &Flags) {
875 "Expected promotable zero stores.");
883 if (NextI == MergeMI)
886 unsigned Opc =
I->getOpcode();
887 unsigned MergeMIOpc = MergeMI->getOpcode();
888 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
889 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
890 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
891 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
893 bool MergeForward =
Flags.getMergeForward();
899 const MachineOperand &BaseRegOp =
901 : AArch64InstrInfo::getLdStBaseOp(*
I);
904 int64_t IOffsetInBytes =
906 int64_t MIOffsetInBytes =
911 if (IOffsetInBytes > MIOffsetInBytes)
912 OffsetImm = MIOffsetInBytes;
914 OffsetImm = IOffsetInBytes;
919 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
920 int NewOffsetStride =
TII->getMemScale(NewOpcode);
921 assert(((OffsetImm % NewOffsetStride) == 0) &&
922 "Offset should be a multiple of the store memory scale");
923 OffsetImm = OffsetImm / NewOffsetStride;
929 MachineInstrBuilder MIB;
931 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
935 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
938 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
947 I->eraseFromParent();
948 MergeMI->eraseFromParent();
958 auto MBB =
MI.getParent();
966 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
967 TRI->regsOverlap(MOP.getReg(), DefReg);
981 if (MOP.isReg() && MOP.isKill())
985 if (MOP.isReg() && !MOP.isKill())
986 Units.
addReg(MOP.getReg());
993 unsigned InstrNumToSet,
1000 unsigned OperandNo = 0;
1001 bool RegFound =
false;
1002 for (
const auto Op : MergedInstr.
operands()) {
1003 if (
Op.getReg() ==
Reg) {
1012 {InstrNumToSet, OperandNo});
1018 const LdStPairFlags &Flags) {
1025 if (NextI == Paired)
1028 int SExtIdx =
Flags.getSExtIdx();
1031 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1032 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1034 bool MergeForward =
Flags.getMergeForward();
1036 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1039 DefinedInBB.
addReg(*RenameReg);
1043 auto GetMatchingSubReg =
1044 [
this, RenameReg](
const TargetRegisterClass *
C) ->
MCPhysReg {
1046 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1047 if (
C->contains(SubOrSuper))
1053 std::function<bool(MachineInstr &,
bool)> UpdateMIs =
1054 [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
1057 bool SeenDef =
false;
1059 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1063 (!MergeForward || !SeenDef ||
1065 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1068 "Need renamable operands");
1070 if (
const TargetRegisterClass *RC =
1072 MatchingReg = GetMatchingSubReg(RC);
1076 MatchingReg = GetMatchingSubReg(
1077 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1085 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1087 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1090 "Need renamable operands");
1092 if (
const TargetRegisterClass *RC =
1094 MatchingReg = GetMatchingSubReg(RC);
1096 MatchingReg = GetMatchingSubReg(
1097 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1098 assert(MatchingReg != AArch64::NoRegister &&
1099 "Cannot find matching regs for renaming");
1108 TRI, UINT32_MAX, UpdateMIs);
1121 RegToCheck = RegToRename;
1124 MergeForward ? std::next(
I) :
I,
1125 MergeForward ? std::next(Paired) : Paired))
1127 [
this, RegToCheck](
const MachineOperand &MOP) {
1128 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1130 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1132 "Rename register used between paired instruction, trashing the "
1142 const MachineOperand &BaseRegOp =
1144 : AArch64InstrInfo::getLdStBaseOp(*
I);
1148 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1149 if (IsUnscaled != PairedIsUnscaled) {
1153 int MemSize =
TII->getMemScale(*Paired);
1154 if (PairedIsUnscaled) {
1157 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1158 "Offset should be a multiple of the stride!");
1159 PairedOffset /= MemSize;
1161 PairedOffset *= MemSize;
1168 MachineInstr *RtMI, *Rt2MI;
1169 if (
Offset == PairedOffset + OffsetStride &&
1177 SExtIdx = (SExtIdx + 1) % 2;
1185 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1186 "Unscaled offset cannot be scaled.");
1187 OffsetImm /=
TII->getMemScale(*RtMI);
1191 MachineInstrBuilder MIB;
1196 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1198 if (RegOp0.
isUse()) {
1199 if (!MergeForward) {
1210 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1211 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1219 for (MachineInstr &
MI :
1220 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1237 .setMIFlags(
I->mergeFlagsWith(*Paired));
1242 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1247 if (SExtIdx != -1) {
1252 MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
1257 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1266 MachineInstrBuilder MIBKill =
1267 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1269 .
addReg(DstRegX, RegState::Define);
1272 MachineInstrBuilder MIBSXTW =
1273 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1313 if (
I->peekDebugInstrNum()) {
1321 unsigned NewInstrNum;
1322 if (DstRegX ==
I->getOperand(0).getReg()) {
1331 if (Paired->peekDebugInstrNum()) {
1339 unsigned NewInstrNum;
1340 if (DstRegX == Paired->getOperand(0).getReg()) {
1353 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1359 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1360 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1361 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1392 if (
I->peekDebugInstrNum()) {
1397 if (Paired->peekDebugInstrNum()) {
1417 SmallSetVector<Register, 4>
Ops;
1418 for (
const MachineOperand &MO :
1420 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1421 Ops.insert(MO.getReg());
1422 for (
const MachineOperand &MO :
1424 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1425 Ops.insert(MO.getReg());
1427 MIB.addDef(
Op, RegState::Implicit);
1429 CopyImplicitOps(
I, Paired);
1432 I->eraseFromParent();
1433 Paired->eraseFromParent();
1442 next_nodbg(LoadI, LoadI->getParent()->end());
1444 int LoadSize =
TII->getMemScale(*LoadI);
1445 int StoreSize =
TII->getMemScale(*StoreI);
1449 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1452 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1453 "Unexpected RegClass");
1455 MachineInstr *BitExtMI;
1456 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1459 if (StRt == LdRt && LoadSize == 8) {
1460 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1461 LoadI->getIterator())) {
1462 if (
MI.killsRegister(StRt,
TRI)) {
1463 MI.clearRegisterKills(StRt,
TRI);
1470 LoadI->eraseFromParent();
1475 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1476 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1477 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1486 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1487 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1488 "Unsupported ld/st match");
1489 assert(LoadSize <= StoreSize &&
"Invalid load size");
1490 int UnscaledLdOffset =
1494 int UnscaledStOffset =
1498 int Width = LoadSize * 8;
1501 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1504 assert((UnscaledLdOffset >= UnscaledStOffset &&
1505 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1508 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1509 int Imms = Immr + Width - 1;
1510 if (UnscaledLdOffset == UnscaledStOffset) {
1511 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1517 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1518 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1523 }
else if (IsStoreXReg && Imms == 31) {
1526 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1527 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1528 TII->get(AArch64::UBFMWri),
1529 TRI->getSubReg(DestReg, AArch64::sub_32))
1530 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1536 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1537 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1547 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1549 if (
MI.killsRegister(StRt,
TRI)) {
1550 MI.clearRegisterKills(StRt,
TRI);
1565 LoadI->eraseFromParent();
1575 if (
Offset % OffsetStride)
1587 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1604bool AArch64LoadStoreOpt::findMatchingStore(
1609 MachineInstr &LoadMI = *
I;
1619 ModifiedRegUnits.
clear();
1620 UsedRegUnits.
clear();
1625 MachineInstr &
MI = *
MBBI;
1629 if (!
MI.isTransient())
1655 if (!ModifiedRegUnits.
available(BaseReg))
1673 LdStPairFlags &Flags,
1676 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1681 !
TII->isLdStPairSuppressed(FirstMI) &&
1682 "FirstMI shouldn't get here if either of these checks are true.");
1689 unsigned OpcB =
MI.getOpcode();
1697 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1698 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1706 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1708 assert(IsValidLdStrOpc &&
1709 "Given Opc should be a Load or Store with an immediate");
1712 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1718 if (!PairIsValidLdStrOpc)
1723 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1726 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1735 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1744 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1751 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1752 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1753 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1754 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1757 <<
" Cannot rename operands with multiple disjunct subregisters ("
1768 return TRI->isSuperOrSubRegisterEq(
1791 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1792 MOP.isImplicit() && MOP.isKill() &&
1793 TRI->regsOverlap(RegToRename, MOP.getReg());
1799 bool FoundDef =
false;
1830 if (
MI.isPseudo()) {
1831 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1835 for (
auto &MOP :
MI.operands()) {
1837 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1847 for (
auto &MOP :
MI.operands()) {
1849 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1866 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1894 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1896 if (MI.getFlag(MachineInstr::FrameSetup)) {
1897 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1902 for (
auto &MOP :
MI.operands()) {
1903 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1904 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1906 if (!canRenameMOP(MOP, TRI)) {
1907 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1933 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1934 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1936 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1942 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1945 TRI->sub_and_superregs_inclusive(PR),
1946 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1950 auto *RegClass =
TRI->getMinimalPhysRegClass(
Reg);
1953 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1954 CanBeUsedForAllClasses(PR)) {
1962 <<
TRI->getRegClassName(RegClass) <<
"\n");
1963 return std::nullopt;
1974 std::optional<MCPhysReg> RenameReg;
1983 const bool IsLoad = FirstMI.
mayLoad();
1985 if (!MaybeCanRename) {
1988 RequiredClasses,
TRI)};
1994 if (*MaybeCanRename) {
1996 RequiredClasses,
TRI);
2005 LdStPairFlags &Flags,
unsigned Limit,
2006 bool FindNarrowMerge) {
2010 MachineInstr &FirstMI = *
I;
2014 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
2018 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
2021 std::optional<bool> MaybeCanRename;
2023 MaybeCanRename = {
false};
2025 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2026 LiveRegUnits UsedInBetween;
2029 Flags.clearRenameReg();
2033 ModifiedRegUnits.
clear();
2034 UsedRegUnits.
clear();
2037 SmallVector<MachineInstr *, 4> MemInsns;
2042 MachineInstr &
MI = *
MBBI;
2049 if (!
MI.isTransient())
2052 Flags.setSExtIdx(-1);
2055 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2064 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2065 if (IsUnscaled != MIIsUnscaled) {
2069 int MemSize =
TII->getMemScale(
MI);
2073 if (MIOffset % MemSize) {
2079 MIOffset /= MemSize;
2081 MIOffset *= MemSize;
2087 if (BaseReg == MIBaseReg) {
2093 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2094 bool IsBaseRegUsed = !UsedRegUnits.
available(
2096 bool IsBaseRegModified = !ModifiedRegUnits.
available(
2101 bool IsMIRegTheSame =
2104 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2112 if ((
Offset != MIOffset + OffsetStride) &&
2113 (
Offset + OffsetStride != MIOffset)) {
2122 if (FindNarrowMerge) {
2127 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2144 <<
"keep looking.\n");
2150 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2155 <<
"Offset doesn't fit due to alignment requirements, "
2156 <<
"keep looking.\n");
2167 if (!ModifiedRegUnits.
available(BaseReg))
2170 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2177 bool RtNotModified =
2179 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2182 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2184 << (RtNotModified ?
"true" :
"false") <<
"\n"
2186 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2188 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2193 std::optional<MCPhysReg> RenameReg =
2195 Reg, DefinedInBB, UsedInBetween,
2196 RequiredClasses,
TRI);
2202 <<
"keep looking.\n");
2205 Flags.setRenameReg(*RenameReg);
2208 Flags.setMergeForward(
false);
2210 Flags.clearRenameReg();
2221 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2223 <<
"' not modified: "
2224 << (RtNotModified ?
"true" :
"false") <<
"\n");
2226 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2228 Flags.setMergeForward(
true);
2229 Flags.clearRenameReg();
2234 MaybeCanRename, FirstMI,
MI,
Reg, DefinedInBB, UsedInBetween,
2235 RequiredClasses,
TRI);
2237 Flags.setMergeForward(
true);
2238 Flags.setRenameReg(*RenameReg);
2239 MBBIWithRenameReg =
MBBI;
2242 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2243 <<
"interference in between, keep looking.\n");
2247 if (
Flags.getRenameReg())
2248 return MBBIWithRenameReg;
2262 if (!ModifiedRegUnits.
available(BaseReg)) {
2268 if (
MI.mayLoadOrStore())
2276 assert((
MI.getOpcode() == AArch64::SUBXri ||
2277 MI.getOpcode() == AArch64::ADDXri) &&
2278 "Expected a register update instruction");
2279 auto End =
MI.getParent()->end();
2280 if (MaybeCFI == End ||
2281 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2284 MI.getOperand(0).getReg() != AArch64::SP)
2288 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2299std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2301 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2302 assert((Update->getOpcode() == AArch64::ADDXri ||
2303 Update->getOpcode() == AArch64::SUBXri) &&
2304 "Unexpected base register update instruction to merge!");
2320 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2321 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2323 return std::nullopt;
2325 MachineBasicBlock *
MBB = InsertPt->getParent();
2334 if (NextI == Update)
2337 int Value = Update->getOperand(2).getImm();
2339 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2340 if (Update->getOpcode() == AArch64::SUBXri)
2345 MachineInstrBuilder MIB;
2346 int Scale, MinOffset, MaxOffset;
2350 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2352 .
add(Update->getOperand(0))
2360 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2362 .
add(Update->getOperand(0))
2387 I->eraseFromParent();
2388 Update->eraseFromParent();
2396 unsigned Offset,
int Scale) {
2397 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2398 "Unexpected const mov instruction to merge!");
2402 MachineInstr &MemMI = *
I;
2403 unsigned Mask = (1 << 12) * Scale - 1;
2408 MachineInstrBuilder AddMIB, MemMIB;
2412 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2420 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2428 ++NumConstOffsetFolded;
2443 I->eraseFromParent();
2444 PrevI->eraseFromParent();
2445 Update->eraseFromParent();
2450bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2452 unsigned BaseReg,
int Offset) {
2453 switch (
MI.getOpcode()) {
2456 case AArch64::SUBXri:
2457 case AArch64::ADDXri:
2460 if (!
MI.getOperand(2).isImm())
2468 if (
MI.getOperand(0).getReg() != BaseReg ||
2469 MI.getOperand(1).getReg() != BaseReg)
2472 int UpdateOffset =
MI.getOperand(2).getImm();
2473 if (
MI.getOpcode() == AArch64::SUBXri)
2474 UpdateOffset = -UpdateOffset;
2478 int Scale, MinOffset, MaxOffset;
2480 if (UpdateOffset % Scale != 0)
2484 int ScaledOffset = UpdateOffset / Scale;
2485 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2497bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2503 if (
MI.getOpcode() == AArch64::MOVKWi &&
2504 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2513 MachineInstr &MovzMI = *
MBBI;
2515 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2518 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2521 return Offset >> 24 == 0;
2530 MachineInstr &MemMI = *
I;
2535 TII->getMemScale(MemMI);
2540 if (MIUnscaledOffset != UnscaledOffset)
2551 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2553 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2560 ModifiedRegUnits.
clear();
2561 UsedRegUnits.
clear();
2567 const bool BaseRegSP =
BaseReg == AArch64::SP;
2576 MachineBasicBlock *CurMBB =
I->getParent();
2583 MachineInstr &
MI = *
MBBI;
2587 if (!
MI.isTransient())
2591 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2602 if (!ModifiedRegUnits.
available(BaseReg) ||
2604 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2608 if (!VisitSucc || Limit <=
Count)
2613 MachineBasicBlock *SuccToVisit =
nullptr;
2614 unsigned LiveSuccCount = 0;
2615 for (MachineBasicBlock *Succ : CurMBB->
successors()) {
2616 for (MCRegAliasIterator AI(BaseReg,
TRI,
true); AI.isValid(); ++AI) {
2617 if (Succ->isLiveIn(*AI)) {
2618 if (LiveSuccCount++)
2620 if (Succ->pred_size() == 1)
2628 CurMBB = SuccToVisit;
2639 MachineInstr &MemMI = *
I;
2641 MachineFunction &MF = *MemMI.
getMF();
2649 : AArch64::NoRegister};
2658 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2659 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2663 const bool BaseRegSP =
BaseReg == AArch64::SP;
2671 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
2672 unsigned RedZoneSize =
2677 ModifiedRegUnits.
clear();
2678 UsedRegUnits.
clear();
2680 bool MemAccessBeforeSPPreInc =
false;
2684 MachineInstr &
MI = *
MBBI;
2688 if (!
MI.isTransient())
2692 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2695 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2705 if (!ModifiedRegUnits.
available(BaseReg) ||
2713 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2714 (DestReg[0] != AArch64::NoRegister &&
2715 !(ModifiedRegUnits.
available(DestReg[0]) &&
2717 (DestReg[1] != AArch64::NoRegister &&
2718 !(ModifiedRegUnits.
available(DestReg[1]) &&
2720 MergeEither =
false;
2725 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2726 MemAccessBeforeSPPreInc =
true;
2732AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2736 MachineInstr &MemMI = *
I;
2755 ModifiedRegUnits.
clear();
2756 UsedRegUnits.
clear();
2760 MachineInstr &
MI = *
MBBI;
2764 if (!
MI.isTransient())
2768 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2777 if (!ModifiedRegUnits.
available(IndexReg) ||
2785bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2787 MachineInstr &
MI = *
MBBI;
2789 if (
MI.hasOrderedMemoryRef())
2803 ++NumLoadsFromStoresPromoted;
2807 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2814bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2817 MachineInstr &
MI = *
MBBI;
2820 if (!
TII->isCandidateToMergeOrPair(
MI))
2824 LdStPairFlags
Flags;
2828 ++NumZeroStoresPromoted;
2832 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2841 MachineInstr &
MI = *
MBBI;
2844 if (!
TII->isCandidateToMergeOrPair(
MI))
2848 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2852 if (
MI.mayStore() && Subtarget->hasDisableStp())
2858 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2860 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2868 LdStPairFlags
Flags;
2874 auto Prev = std::prev(
MBBI);
2878 MachineMemOperand *MemOp =
2879 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2884 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2885 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2887 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2888 NumFailedAlignmentCheck++;
2894 uint64_t MemAlignment = MemOp->getAlign().value();
2895 uint64_t TypeAlignment =
2896 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2898 if (MemAlignment < 2 * TypeAlignment) {
2899 NumFailedAlignmentCheck++;
2905 if (
TII->hasUnscaledLdStOffset(
MI))
2906 ++NumUnscaledPairCreated;
2908 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2911 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2919bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2921 MachineInstr &
MI = *
MBBI;
2933 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2942 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2951 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2954 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2955 true, MergeEither)) {
2964 int UnscaledOffset =
2972 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2975 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2988 MachineInstr &
MI = *
MBBI;
2993 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
3005 if (Update !=
E && (
Offset & (Scale - 1)) == 0) {
3014bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
3015 bool EnableNarrowZeroStOpt) {
3047 if (EnableNarrowZeroStOpt)
3071 DefinedInBB.
clear();
3080 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3119bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3132 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3133 for (
auto &
MBB : Fn) {
3151bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3154 AArch64LoadStoreOpt Impl;
3155 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3156 return Impl.runOnMachineFunction(MF);
3162 return new AArch64LoadStoreOptLegacy();
3168 AArch64LoadStoreOpt Impl;
3172 bool Changed = Impl.runOnMachineFunction(MF);
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.