72#define DEBUG_TYPE "arm-ldst-opt"
74STATISTIC(NumLDMGened ,
"Number of ldm instructions generated");
75STATISTIC(NumSTMGened ,
"Number of stm instructions generated");
76STATISTIC(NumVLDMGened,
"Number of vldm instructions generated");
77STATISTIC(NumVSTMGened,
"Number of vstm instructions generated");
78STATISTIC(NumLdStMoved,
"Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,
"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,
"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM,
"Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM,
"Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR,
"Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR,
"Number of strd instructions turned back into str's");
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
101struct ARMLoadStoreOpt {
112 bool RegClassInfoValid;
113 bool isThumb1, isThumb2;
120 struct MemOpQueueEntry {
132 struct MergeCandidate {
137 unsigned LatestMIIdx;
140 unsigned EarliestMIIdx;
147 bool CanMergeToLSMulti;
150 bool CanMergeToLSDouble;
161 unsigned Base,
unsigned WordOffset,
168 ArrayRef<std::pair<unsigned, bool>> Regs,
175 ArrayRef<std::pair<unsigned, bool>> Regs,
177 void FormCandidates(
const MemOpQueue &MemOps);
178 MachineInstr *MergeOpsUpdate(
const MergeCandidate &Cand);
203char ARMLoadStoreOptLegacy::ID = 0;
211 for (
const auto &MO :
MI.operands()) {
214 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
224 unsigned Opcode =
MI.getOpcode();
225 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
226 unsigned NumOperands =
MI.getDesc().getNumOperands();
227 unsigned OffField =
MI.getOperand(NumOperands - 3).getImm();
229 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
230 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
231 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
232 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
236 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
237 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
252 return MI.getOperand(1);
256 return MI.getOperand(0);
353 case ARM::tLDMIA_UPD:
354 case ARM::tSTMIA_UPD:
355 case ARM::t2LDMIA_RET:
357 case ARM::t2LDMIA_UPD:
359 case ARM::t2STMIA_UPD:
361 case ARM::VLDMSIA_UPD:
363 case ARM::VSTMSIA_UPD:
365 case ARM::VLDMDIA_UPD:
367 case ARM::VSTMDIA_UPD:
381 case ARM::t2LDMDB_UPD:
383 case ARM::t2STMDB_UPD:
384 case ARM::VLDMSDB_UPD:
385 case ARM::VSTMSDB_UPD:
386 case ARM::VLDMDDB_UPD:
387 case ARM::VSTMDDB_UPD:
399 return Opc == ARM::tLDRi ||
Opc == ARM::tLDRspi;
403 return Opc == ARM::t2LDRi12 ||
Opc == ARM::t2LDRi8;
411 return Opc == ARM::tSTRi ||
Opc == ARM::tSTRspi;
415 return Opc == ARM::t2STRi12 ||
Opc == ARM::t2STRi8;
444 switch (
MI->getOpcode()) {
471 case ARM::tLDMIA_UPD:
472 case ARM::tSTMIA_UPD:
479 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 4;
482 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 8;
494 assert(isThumb1 &&
"Can only update base register uses for Thumb1!");
498 bool InsertSub =
false;
499 unsigned Opc =
MBBI->getOpcode();
501 if (
MBBI->readsRegister(
Base,
nullptr)) {
504 Opc == ARM::tLDRi ||
Opc == ARM::tLDRHi ||
Opc == ARM::tLDRBi;
506 Opc == ARM::tSTRi ||
Opc == ARM::tSTRHi ||
Opc == ARM::tSTRBi;
508 if (IsLoad || IsStore) {
514 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
521 if (
Offset >= 0 && !(IsStore && InstrSrcReg ==
Base))
525 }
else if ((
Opc == ARM::tSUBi8 ||
Opc == ARM::tADDi8) &&
526 !definesCPSR(*
MBBI)) {
531 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
533 MO.
getImm() + WordOffset * 4 :
534 MO.
getImm() - WordOffset * 4 ;
548 }
else if (definesCPSR(*
MBBI) ||
MBBI->isCall() ||
MBBI->isBranch()) {
566 if (
MBBI->killsRegister(
Base,
nullptr) ||
567 MBBI->definesRegister(
Base,
nullptr))
589unsigned ARMLoadStoreOpt::findFreeReg(
const TargetRegisterClass &RegClass) {
590 if (!RegClassInfoValid) {
592 RegClassInfoValid =
true;
595 for (
unsigned Reg : RegClassInfo.
getOrder(&RegClass))
604void ARMLoadStoreOpt::moveLiveRegsBefore(
const MachineBasicBlock &
MBB,
607 if (!LiveRegsValid) {
611 LiveRegsValid =
true;
614 while (LiveRegPos != Before) {
622 for (
const std::pair<unsigned, bool> &R : Regs)
631MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
633 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
635 ArrayRef<std::pair<unsigned, bool>> Regs,
637 unsigned NumRegs = Regs.size();
642 bool SafeToClobberCPSR = !isThumb1 ||
646 bool Writeback = isThumb1;
652 assert(
Base != ARM::SP &&
"Thumb1 does not allow SP in register list");
653 if (Opcode == ARM::tLDRi)
655 else if (Opcode == ARM::tSTRi)
662 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
664 if (
Offset == 4 && haveIBAndDA) {
666 }
else if (
Offset == -4 * (
int)NumRegs + 4 && haveIBAndDA) {
668 }
else if (
Offset == -4 * (
int)NumRegs && isNotVFP && !isThumb1) {
671 }
else if (
Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
684 if (!SafeToClobberCPSR)
691 NewBase = Regs[NumRegs-1].first;
695 moveLiveRegsBefore(
MBB, InsertBefore);
699 for (
const std::pair<unsigned, bool> &R : Regs)
702 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
707 int BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2ADDspImm
711 : (isThumb1 &&
Offset < 8)
713 : isThumb1 ?
ARM::tADDi8 :
ARM::ADDri;
719 BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2SUBspImm
723 : isThumb1 ?
ARM::tSUBi8 :
ARM::SUBri;
732 bool KillOldBase = BaseKill &&
741 if (
Base != NewBase &&
742 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
760 if (BaseOpc == ARM::tADDrSPi) {
761 assert(
Offset % 4 == 0 &&
"tADDrSPi offset is scaled by 4");
799 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
802 MachineInstrBuilder MIB;
805 assert(isThumb1 &&
"expected Writeback only inThumb1");
806 if (Opcode == ARM::tLDMIA) {
809 Opcode = ARM::tLDMIA_UPD;
821 UpdateBaseRegUses(
MBB, InsertBefore,
DL,
Base, NumRegs, Pred, PredReg);
830 for (
const std::pair<unsigned, bool> &R : Regs)
838MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
840 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
842 ArrayRef<std::pair<unsigned, bool>> Regs,
845 assert((IsLoad ||
isi32Store(Opcode)) &&
"Must have integer load or store");
846 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
849 MachineInstrBuilder MIB =
BuildMI(
MBB, InsertBefore,
DL,
850 TII->get(LoadStoreOpcode));
852 MIB.
addReg(Regs[0].first, RegState::Define)
853 .
addReg(Regs[1].first, RegState::Define);
864MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(
const MergeCandidate &Cand) {
865 const MachineInstr *
First = Cand.Instrs.front();
866 unsigned Opcode =
First->getOpcode();
869 SmallVector<unsigned, 4> ImpDefs;
870 DenseSet<unsigned> KilledRegs;
871 DenseSet<unsigned> UsedRegs;
873 for (
const MachineInstr *
MI : Cand.Instrs) {
876 bool IsKill = MO.
isKill();
886 for (
const MachineOperand &MO :
MI->implicit_operands()) {
895 if (
MI->readsRegister(DefReg,
nullptr))
905 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
914 MachineInstr *Merged =
nullptr;
915 if (Cand.CanMergeToLSDouble)
916 Merged = CreateLoadStoreDouble(
MBB, InsertBefore,
Offset,
Base, BaseKill,
917 Opcode, Pred, PredReg,
DL, Regs,
919 if (!Merged && Cand.CanMergeToLSMulti)
920 Merged = CreateLoadStoreMulti(
MBB, InsertBefore,
Offset,
Base, BaseKill,
921 Opcode, Pred, PredReg,
DL, Regs, Cand.Instrs);
927 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
928 bool EarliestAtBegin =
false;
930 EarliestAtBegin =
true;
932 EarliestI = std::prev(EarliestI);
936 for (MachineInstr *
MI : Cand.Instrs)
943 EarliestI = std::next(EarliestI);
949 for (MachineInstr &
MI : FixupRange) {
950 for (
unsigned &ImpDefReg : ImpDefs) {
951 for (MachineOperand &MO :
MI.implicit_operands()) {
963 for (
unsigned ImpDef : ImpDefs)
964 MIB.
addReg(ImpDef, RegState::ImplicitDefine);
968 for (MachineInstr &
MI : FixupRange) {
969 for (MachineOperand &MO :
MI.uses()) {
995 unsigned Opcode =
MI.getOpcode();
1008void ARMLoadStoreOpt::FormCandidates(
const MemOpQueue &MemOps) {
1009 const MachineInstr *FirstMI = MemOps[0].MI;
1014 unsigned SIndex = 0;
1015 unsigned EIndex = MemOps.size();
1018 const MachineInstr *
MI = MemOps[SIndex].MI;
1019 int Offset = MemOps[SIndex].Offset;
1022 unsigned PRegNum = PMO.
isUndef() ? std::numeric_limits<unsigned>::max()
1023 :
TRI->getEncodingValue(PReg);
1024 unsigned Latest = SIndex;
1025 unsigned Earliest = SIndex;
1027 bool CanMergeToLSDouble =
1033 CanMergeToLSDouble =
false;
1035 bool CanMergeToLSMulti =
true;
1038 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1039 CanMergeToLSMulti =
false;
1043 if (PReg == ARM::SP || PReg == ARM::PC)
1044 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1048 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1063 for (
unsigned I = SIndex+1;
I < EIndex; ++
I, ++
Count) {
1064 int NewOffset = MemOps[
I].Offset;
1069 if (
Reg == ARM::SP ||
Reg == ARM::PC)
1075 unsigned RegNum = MO.
isUndef() ? std::numeric_limits<unsigned>::max()
1076 :
TRI->getEncodingValue(
Reg);
1077 bool PartOfLSMulti = CanMergeToLSMulti;
1078 if (PartOfLSMulti) {
1080 if (RegNum <= PRegNum)
1081 PartOfLSMulti =
false;
1085 else if (!isNotVFP && RegNum != PRegNum+1)
1086 PartOfLSMulti =
false;
1089 bool PartOfLSDouble = CanMergeToLSDouble &&
Count <= 1;
1091 if (!PartOfLSMulti && !PartOfLSDouble)
1093 CanMergeToLSMulti &= PartOfLSMulti;
1094 CanMergeToLSDouble &= PartOfLSDouble;
1097 unsigned Position = MemOps[
I].Position;
1098 if (Position < MemOps[Latest].Position)
1100 else if (Position > MemOps[Earliest].Position)
1108 MergeCandidate *Candidate =
new(
Allocator.Allocate()) MergeCandidate;
1109 for (
unsigned C = SIndex, CE = SIndex +
Count;
C <
CE; ++
C)
1110 Candidate->Instrs.push_back(MemOps[
C].
MI);
1111 Candidate->LatestMIIdx = Latest - SIndex;
1112 Candidate->EarliestMIIdx = Earliest - SIndex;
1113 Candidate->InsertPos = MemOps[Latest].Position;
1115 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1116 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1117 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1118 Candidates.push_back(Candidate);
1121 }
while (SIndex < EIndex);
1198 switch (
MI.getOpcode()) {
1199 case ARM::tADDi8: Scale = 4; CheckCPSRDef =
true;
break;
1200 case ARM::tSUBi8: Scale = -4; CheckCPSRDef =
true;
break;
1202 case ARM::t2SUBspImm:
1203 case ARM::SUBri: Scale = -1; CheckCPSRDef =
true;
break;
1205 case ARM::t2ADDspImm:
1206 case ARM::ADDri: Scale = 1; CheckCPSRDef =
true;
break;
1207 case ARM::tADDspi: Scale = 4; CheckCPSRDef =
false;
break;
1208 case ARM::tSUBspi: Scale = -4; CheckCPSRDef =
false;
break;
1213 if (
MI.getOperand(0).getReg() !=
Reg ||
1214 MI.getOperand(1).getReg() !=
Reg ||
1216 MIPredReg != PredReg)
1219 if (CheckCPSRDef && definesCPSR(
MI))
1221 return MI.getOperand(2).getImm() * Scale;
1232 if (
MBBI == BeginMBBI)
1237 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1241 return Offset == 0 ? EndMBBI : PrevMBBI;
1253 while (NextMBBI != EndMBBI) {
1255 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1257 if (NextMBBI == EndMBBI)
1271 if (
Reg == ARM::SP || NextMBBI->readsRegister(
Reg,
TRI) ||
1272 NextMBBI->definesRegister(
Reg,
TRI))
1292bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *
MI) {
1294 if (isThumb1)
return false;
1297 const MachineOperand &BaseOP =
MI->getOperand(0);
1299 bool BaseKill = BaseOP.
isKill();
1302 unsigned Opcode =
MI->getOpcode();
1312 MachineBasicBlock &
MBB = *
MI->getParent();
1334 bool HighRegsUsed =
false;
1336 if (MO.
getReg() >= ARM::R8) {
1337 HighRegsUsed =
true;
1347 if (MergeInstr !=
MBB.
end()) {
1374 return ARM::LDR_PRE_IMM;
1376 return ARM::STR_PRE_IMM;
1387 return ARM::t2LDR_PRE;
1390 return ARM::t2STR_PRE;
1399 return ARM::LDR_POST_IMM;
1401 return ARM::STR_POST_IMM;
1412 return ARM::t2LDR_POST;
1414 case ARM::t2LDRBi12:
1415 return ARM::t2LDRB_POST;
1416 case ARM::t2LDRSBi8:
1417 case ARM::t2LDRSBi12:
1418 return ARM::t2LDRSB_POST;
1420 case ARM::t2LDRHi12:
1421 return ARM::t2LDRH_POST;
1422 case ARM::t2LDRSHi8:
1423 case ARM::t2LDRSHi12:
1424 return ARM::t2LDRSH_POST;
1427 return ARM::t2STR_POST;
1429 case ARM::t2STRBi12:
1430 return ARM::t2STRB_POST;
1432 case ARM::t2STRHi12:
1433 return ARM::t2STRH_POST;
1435 case ARM::MVE_VLDRBS16:
1436 return ARM::MVE_VLDRBS16_post;
1437 case ARM::MVE_VLDRBS32:
1438 return ARM::MVE_VLDRBS32_post;
1439 case ARM::MVE_VLDRBU16:
1440 return ARM::MVE_VLDRBU16_post;
1441 case ARM::MVE_VLDRBU32:
1442 return ARM::MVE_VLDRBU32_post;
1443 case ARM::MVE_VLDRHS32:
1444 return ARM::MVE_VLDRHS32_post;
1445 case ARM::MVE_VLDRHU32:
1446 return ARM::MVE_VLDRHU32_post;
1447 case ARM::MVE_VLDRBU8:
1448 return ARM::MVE_VLDRBU8_post;
1449 case ARM::MVE_VLDRHU16:
1450 return ARM::MVE_VLDRHU16_post;
1451 case ARM::MVE_VLDRWU32:
1452 return ARM::MVE_VLDRWU32_post;
1453 case ARM::MVE_VSTRB16:
1454 return ARM::MVE_VSTRB16_post;
1455 case ARM::MVE_VSTRB32:
1456 return ARM::MVE_VSTRB32_post;
1457 case ARM::MVE_VSTRH32:
1458 return ARM::MVE_VSTRH32_post;
1459 case ARM::MVE_VSTRBU8:
1460 return ARM::MVE_VSTRBU8_post;
1461 case ARM::MVE_VSTRHU16:
1462 return ARM::MVE_VSTRHU16_post;
1463 case ARM::MVE_VSTRWU32:
1464 return ARM::MVE_VSTRWU32_post;
1472bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *
MI) {
1475 if (isThumb1)
return false;
1480 unsigned Opcode =
MI->getOpcode();
1482 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1483 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1484 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1486 if (
MI->getOperand(2).getImm() != 0)
1493 if (
MI->getOperand(0).getReg() ==
Base)
1499 MachineBasicBlock &
MBB = *
MI->getParent();
1505 if (!isAM5 &&
Offset == Bytes) {
1507 }
else if (
Offset == -Bytes) {
1511 if (MergeInstr ==
MBB.
end())
1515 if ((isAM5 &&
Offset != Bytes) ||
1533 MachineOperand &MO =
MI->getOperand(0);
1547 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1584 MachineOperand &MO =
MI->getOperand(0);
1588 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1617bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &
MI)
const {
1618 unsigned Opcode =
MI.getOpcode();
1619 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1620 "Must have t2STRDi8 or t2LDRDi8");
1621 if (
MI.getOperand(3).getImm() != 0)
1627 const MachineOperand &BaseOp =
MI.getOperand(2);
1629 const MachineOperand &Reg0Op =
MI.getOperand(0);
1630 const MachineOperand &Reg1Op =
MI.getOperand(1);
1637 MachineBasicBlock &
MBB = *
MI.getParent();
1643 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1646 if (MergeInstr ==
MBB.
end())
1648 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1657 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1660 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1665 assert(
TII->get(Opcode).getNumOperands() == 6 &&
1666 TII->get(NewOpc).getNumOperands() == 7 &&
1667 "Unexpected number of operands in Opcode specification.");
1670 for (
const MachineOperand &MO :
MI.implicit_operands())
1682 unsigned Opcode =
MI.getOpcode();
1702 if (!
MI.getOperand(1).isReg())
1707 if (!
MI.hasOneMemOperand())
1726 if (
MI.getOperand(0).isReg() &&
MI.getOperand(0).isUndef())
1730 if (
MI.getOperand(1).isUndef())
1738 bool isDef,
unsigned NewOpc,
unsigned Reg,
1739 bool RegDeadKill,
bool RegUndef,
unsigned BaseReg,
1764bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &
MBB,
1766 MachineInstr *
MI = &*
MBBI;
1767 unsigned Opcode =
MI->getOpcode();
1770 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1773 const MachineOperand &BaseOp =
MI->getOperand(2);
1775 Register EvenReg =
MI->getOperand(0).getReg();
1776 Register OddReg =
MI->getOperand(1).getReg();
1777 unsigned EvenRegNum =
TRI->getDwarfRegNum(EvenReg,
false);
1778 unsigned OddRegNum =
TRI->getDwarfRegNum(OddReg,
false);
1782 bool Errata602117 = EvenReg ==
BaseReg &&
1783 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->
isCortexM3();
1785 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1786 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1788 if (!Errata602117 && !NonConsecutiveRegs)
1791 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1792 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1793 bool EvenDeadKill = isLd ?
1794 MI->getOperand(0).isDead() :
MI->getOperand(0).isKill();
1795 bool EvenUndef =
MI->getOperand(0).isUndef();
1796 bool OddDeadKill = isLd ?
1797 MI->getOperand(1).isDead() :
MI->getOperand(1).isKill();
1798 bool OddUndef =
MI->getOperand(1).isUndef();
1799 bool BaseKill = BaseOp.
isKill();
1800 bool BaseUndef = BaseOp.
isUndef();
1801 assert((isT2 ||
MI->getOperand(3).getReg() == ARM::NoRegister) &&
1802 "register offset not handled below");
1807 if (OddRegNum > EvenRegNum && OffImm == 0) {
1810 unsigned NewOpc = (isLd)
1811 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1812 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1834 unsigned NewOpc = (isLd)
1835 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1836 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1839 unsigned NewOpc2 = (isLd)
1840 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1841 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1844 if (isLd &&
TRI->regsOverlap(EvenReg, BaseReg)) {
1845 assert(!
TRI->regsOverlap(OddReg, BaseReg));
1847 false, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
MI);
1849 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1852 if (OddReg == EvenReg && EvenDeadKill) {
1856 EvenDeadKill =
false;
1860 if (EvenReg == BaseReg)
1861 EvenDeadKill =
false;
1863 EvenUndef, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
1866 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1881bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &
MBB) {
1883 unsigned CurrBase = 0;
1884 unsigned CurrOpc = ~0
u;
1886 unsigned Position = 0;
1887 assert(Candidates.size() == 0);
1889 LiveRegsValid =
false;
1894 MBBI = std::prev(
I);
1895 if (FixInvalidRegPairOp(
MBB,
MBBI))
1900 unsigned Opcode =
MBBI->getOpcode();
1901 const MachineOperand &MO =
MBBI->getOperand(0);
1907 if (CurrBase == 0) {
1912 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1916 if (CurrOpc == Opcode && CurrBase ==
Base && CurrPred == Pred) {
1924 bool Overlap =
false;
1928 for (
const MemOpQueueEntry &
E : MemOps) {
1929 if (
TRI->regsOverlap(
Reg,
E.MI->getOperand(0).getReg())) {
1939 if (
Offset > MemOps.back().Offset) {
1940 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1943 MemOpQueue::iterator
MI, ME;
1944 for (
MI = MemOps.begin(), ME = MemOps.end();
MI != ME; ++
MI) {
1955 if (
MI != MemOps.end()) {
1956 MemOps.insert(
MI, MemOpQueueEntry(*
MBBI,
Offset, Position));
1967 }
else if (
MBBI->isDebugInstr()) {
1969 }
else if (
MBBI->getOpcode() == ARM::t2LDRDi8 ||
1970 MBBI->getOpcode() == ARM::t2STRDi8) {
1977 if (MemOps.size() > 0) {
1978 FormCandidates(MemOps);
1986 if (MemOps.size() > 0)
1987 FormCandidates(MemOps);
1991 auto LessThan = [](
const MergeCandidate*
M0,
const MergeCandidate *
M1) {
1992 return M0->InsertPos <
M1->InsertPos;
1998 for (
const MergeCandidate *Candidate : Candidates) {
1999 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
2000 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
2005 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2006 MergeBaseUpdateLSDouble(*Merged);
2008 MergeBaseUpdateLSMultiple(Merged);
2010 for (MachineInstr *
MI : Candidate->Instrs) {
2011 if (MergeBaseUpdateLoadStore(
MI))
2016 assert(Candidate->Instrs.size() == 1);
2017 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2023 for (MachineInstr *
MI : MergeBaseCandidates)
2024 MergeBaseUpdateLSDouble(*
MI);
2025 MergeBaseCandidates.clear();
2040bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &
MBB) {
2042 if (isThumb1)
return false;
2047 (
MBBI->getOpcode() == ARM::BX_RET ||
2048 MBBI->getOpcode() == ARM::tBX_RET ||
2049 MBBI->getOpcode() == ARM::MOVPCLR)) {
2052 while (PrevI->isDebugInstr() && PrevI !=
MBB.
begin())
2054 MachineInstr &PrevMI = *PrevI;
2056 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2057 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2058 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2060 if (MO.
getReg() != ARM::LR)
2062 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2063 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2064 Opcode == ARM::LDMIA_UPD) &&
"Unsupported multiple load-return!");
2075bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &
MBB) {
2078 MBBI->getOpcode() != ARM::tBX_RET)
2083 if (Prev->getOpcode() != ARM::tMOVr ||
2084 !Prev->definesRegister(ARM::LR,
nullptr))
2087 for (
auto Use : Prev->uses())
2089 assert(STI->hasV4TOps());
2102bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2106 AFI = Fn.
getInfo<ARMFunctionInfo>();
2110 RegClassInfoValid =
false;
2114 bool Modified =
false, ModifiedLDMReturn =
false;
2115 for (MachineBasicBlock &
MBB : Fn) {
2118 ModifiedLDMReturn |= MergeReturnIntoLDM(
MBB);
2128 if (ModifiedLDMReturn)
2135bool ARMLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
2138 ARMLoadStoreOpt Impl;
2139 return Impl.runOnMachineFunction(MF);
2142#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2143 "ARM pre- register allocation load / store optimization pass"
2149struct ARMPreAllocLoadStoreOpt {
2172 bool DistributeIncrements();
2183 StringRef getPassName()
const override {
2195char ARMPreAllocLoadStoreOptLegacy::ID = 0;
2208 cl::init(8),
cl::Hidden);
2210bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(
MachineFunction &Fn,
2218 TD = &Fn.getDataLayout();
2220 TII = STI->getInstrInfo();
2221 TRI = STI->getRegisterInfo();
2222 MRI = &Fn.getRegInfo();
2225 bool Modified = DistributeIncrements();
2227 Modified |= RescheduleLoadStoreInstrs(&MFI);
2232bool ARMPreAllocLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &Fn) {
2236 ARMPreAllocLoadStoreOpt Impl;
2237 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2238 MachineDominatorTree *DT =
2239 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2240 return Impl.runOnMachineFunction(Fn, AA, DT);
2253 if (
I->isDebugInstr() || MemOps.
count(&*
I))
2255 if (
I->isCall() ||
I->isTerminator() ||
I->hasUnmodeledSideEffects())
2257 if (
I->mayStore() || (!isLd &&
I->mayLoad()))
2259 if (
I->mayAlias(
AA, *
MemOp,
false))
2261 for (
unsigned j = 0,
NumOps =
I->getNumOperands(); j !=
NumOps; ++j) {
2274 if (MemRegs.
size() <= 4)
2277 return AddedRegPressure.
size() <= MemRegs.
size() * 2;
2280bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2281 MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl,
unsigned &NewOpc,
2285 if (!STI->hasV5TEOps())
2291 if (Opcode == ARM::LDRi12) {
2293 }
else if (Opcode == ARM::STRi12) {
2295 }
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2296 NewOpc = ARM::t2LDRDi8;
2299 }
else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2300 NewOpc = ARM::t2STRDi8;
2317 if (Alignment < ReqAlign)
2323 int Limit = (1 << 8) * Scale;
2324 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2333 int Limit = (1 << 8) * Scale;
2334 if (OffImm >= Limit || (OffImm & (Scale-1)))
2340 if (FirstReg == SecondReg)
2348bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2349 MachineBasicBlock *
MBB, SmallVectorImpl<MachineInstr *> &
Ops,
unsigned Base,
2350 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2352 bool RetVal =
false;
2359 return LOffset > ROffset;
2366 while (
Ops.size() > 1) {
2367 unsigned FirstLoc = ~0
U;
2368 unsigned LastLoc = 0;
2369 MachineInstr *FirstOp =
nullptr;
2370 MachineInstr *LastOp =
nullptr;
2372 unsigned LastOpcode = 0;
2373 unsigned LastBytes = 0;
2374 unsigned NumMove = 0;
2379 if (LastOpcode && LSMOpcode != LastOpcode)
2386 if (Bytes != LastBytes ||
Offset != (LastOffset + (
int)Bytes))
2398 LastOpcode = LSMOpcode;
2400 unsigned Loc = MI2LocMap[
Op];
2401 if (Loc <= FirstLoc) {
2405 if (Loc >= LastLoc) {
2414 SmallPtrSet<MachineInstr*, 4> MemOps;
2415 SmallSet<unsigned, 4> MemRegs;
2416 for (
size_t i =
Ops.size() - NumMove, e =
Ops.size(); i != e; ++i) {
2423 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2426 MemOps, MemRegs,
TRI, AA);
2428 for (
unsigned i = 0; i != NumMove; ++i)
2433 while (InsertPos !=
MBB->
end() &&
2434 (MemOps.
count(&*InsertPos) || InsertPos->isDebugInstr()))
2439 MachineInstr *Op0 =
Ops.back();
2440 MachineInstr *Op1 =
Ops[
Ops.size()-2];
2445 unsigned NewOpc = 0;
2448 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2449 FirstReg, SecondReg, BaseReg,
2450 Offset, PredReg, Pred, isT2)) {
2454 const MCInstrDesc &MCID =
TII->get(NewOpc);
2455 const TargetRegisterClass *TRC =
TII->getRegClass(MCID, 0);
2461 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2462 .
addReg(FirstReg, RegState::Define)
2463 .
addReg(SecondReg, RegState::Define)
2475 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2498 for (
unsigned i = 0; i != NumMove; ++i) {
2499 MachineInstr *
Op =
Ops.pop_back_val();
2510 NumLdStMoved += NumMove;
2521 if (
MI->isNonListDebugValue()) {
2522 auto &
Op =
MI->getOperand(0);
2526 for (
unsigned I = 2;
I <
MI->getNumOperands();
I++) {
2527 auto &
Op =
MI->getOperand(
I);
2541 auto RegIt = RegisterMap.find(
Op.getReg());
2542 if (RegIt == RegisterMap.end())
2544 auto &InstrVec = RegIt->getSecond();
2551 MI->getDebugLoc()->getInlinedAt());
2556ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *
MBB) {
2557 bool RetVal =
false;
2559 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2560 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2561 using BaseVec = SmallVector<unsigned, 4>;
2562 Base2InstMap Base2LdsMap;
2563 Base2InstMap Base2StsMap;
2569 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2576 MachineInstr &
MI = *
MBBI;
2577 if (
MI.isCall() ||
MI.isTerminator()) {
2583 if (!
MI.isDebugInstr())
2584 MI2LocMap[&
MI] = ++Loc;
2592 int Opc =
MI.getOpcode();
2596 bool StopHere =
false;
2597 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2600 BI->second.push_back(&
MI);
2601 Bases.push_back(
Base);
2604 for (
const MachineInstr *
MI : BI->second) {
2611 BI->second.push_back(&
MI);
2615 FindBases(Base2LdsMap, LdBases);
2617 FindBases(Base2StsMap, StBases);
2628 for (
unsigned Base : LdBases) {
2629 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[
Base];
2631 RetVal |= RescheduleOps(
MBB, Lds,
Base,
true, MI2LocMap, RegisterMap);
2635 for (
unsigned Base : StBases) {
2636 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[
Base];
2638 RetVal |= RescheduleOps(
MBB, Sts,
Base,
false, MI2LocMap, RegisterMap);
2642 Base2LdsMap.clear();
2643 Base2StsMap.clear();
2799 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2802 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2804 MachineInstr &
MI = *
MBBI;
2806 auto PopulateRegisterAndInstrMapForDebugInstr = [&](
Register Reg) {
2807 auto RegIt = RegisterMap.
find(
Reg);
2808 if (RegIt == RegisterMap.
end())
2810 auto &InstrVec = RegIt->getSecond();
2811 InstrVec.push_back(&
MI);
2812 InstrMap[&
MI].push_back(
Reg);
2815 if (
MI.isDebugValue()) {
2817 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2825 PopulateRegisterAndInstrMapForDebugInstr(
Op.getReg());
2833 auto InstrIt = DbgValueSinkCandidates.
find(DbgVar);
2834 if (InstrIt != DbgValueSinkCandidates.
end()) {
2835 auto *
Instr = InstrIt->getSecond();
2836 auto RegIt = InstrMap.
find(Instr);
2837 if (RegIt != InstrMap.
end()) {
2838 const auto &RegVec = RegIt->getSecond();
2841 for (
auto &
Reg : RegVec) {
2842 auto RegIt = RegisterMap.
find(
Reg);
2843 if (RegIt == RegisterMap.
end())
2845 auto &InstrVec = RegIt->getSecond();
2846 auto IsDbgVar = [&](MachineInstr *
I) ->
bool {
2848 return Var == DbgVar;
2854 [&](MachineOperand &
Op) {
Op.setReg(0); });
2857 DbgValueSinkCandidates[DbgVar] = &
MI;
2861 auto Opc =
MI.getOpcode();
2864 auto Reg =
MI.getOperand(0).getReg();
2865 auto RegIt = RegisterMap.
find(
Reg);
2866 if (RegIt == RegisterMap.
end())
2868 auto &DbgInstrVec = RegIt->getSecond();
2869 if (!DbgInstrVec.size())
2871 for (
auto *DbgInstr : DbgInstrVec) {
2873 auto *ClonedMI =
MI.getMF()->CloneMachineInstr(DbgInstr);
2882 DbgValueSinkCandidates.
erase(DbgVar);
2885 [&](MachineOperand &
Op) {
Op.setReg(0); });
2888 if (DbgInstr->isDebugValueList())
2902 switch (
MI.getOpcode()) {
2903 case ARM::MVE_VLDRBS16:
2904 case ARM::MVE_VLDRBS32:
2905 case ARM::MVE_VLDRBU16:
2906 case ARM::MVE_VLDRBU32:
2907 case ARM::MVE_VLDRHS32:
2908 case ARM::MVE_VLDRHU32:
2909 case ARM::MVE_VLDRBU8:
2910 case ARM::MVE_VLDRHU16:
2911 case ARM::MVE_VLDRWU32:
2912 case ARM::MVE_VSTRB16:
2913 case ARM::MVE_VSTRB32:
2914 case ARM::MVE_VSTRH32:
2915 case ARM::MVE_VSTRBU8:
2916 case ARM::MVE_VSTRHU16:
2917 case ARM::MVE_VSTRWU32:
2919 case ARM::t2LDRHi12:
2920 case ARM::t2LDRSHi8:
2921 case ARM::t2LDRSHi12:
2923 case ARM::t2LDRBi12:
2924 case ARM::t2LDRSBi8:
2925 case ARM::t2LDRSBi12:
2927 case ARM::t2STRBi12:
2929 case ARM::t2STRHi12:
2931 case ARM::MVE_VLDRBS16_post:
2932 case ARM::MVE_VLDRBS32_post:
2933 case ARM::MVE_VLDRBU16_post:
2934 case ARM::MVE_VLDRBU32_post:
2935 case ARM::MVE_VLDRHS32_post:
2936 case ARM::MVE_VLDRHU32_post:
2937 case ARM::MVE_VLDRBU8_post:
2938 case ARM::MVE_VLDRHU16_post:
2939 case ARM::MVE_VLDRWU32_post:
2940 case ARM::MVE_VSTRB16_post:
2941 case ARM::MVE_VSTRB32_post:
2942 case ARM::MVE_VSTRH32_post:
2943 case ARM::MVE_VSTRBU8_post:
2944 case ARM::MVE_VSTRHU16_post:
2945 case ARM::MVE_VSTRWU32_post:
2946 case ARM::MVE_VLDRBS16_pre:
2947 case ARM::MVE_VLDRBS32_pre:
2948 case ARM::MVE_VLDRBU16_pre:
2949 case ARM::MVE_VLDRBU32_pre:
2950 case ARM::MVE_VLDRHS32_pre:
2951 case ARM::MVE_VLDRHU32_pre:
2952 case ARM::MVE_VLDRBU8_pre:
2953 case ARM::MVE_VLDRHU16_pre:
2954 case ARM::MVE_VLDRWU32_pre:
2955 case ARM::MVE_VSTRB16_pre:
2956 case ARM::MVE_VSTRB32_pre:
2957 case ARM::MVE_VSTRH32_pre:
2958 case ARM::MVE_VSTRBU8_pre:
2959 case ARM::MVE_VSTRHU16_pre:
2960 case ARM::MVE_VSTRWU32_pre:
2967 switch (
MI.getOpcode()) {
2968 case ARM::MVE_VLDRBS16_post:
2969 case ARM::MVE_VLDRBS32_post:
2970 case ARM::MVE_VLDRBU16_post:
2971 case ARM::MVE_VLDRBU32_post:
2972 case ARM::MVE_VLDRHS32_post:
2973 case ARM::MVE_VLDRHU32_post:
2974 case ARM::MVE_VLDRBU8_post:
2975 case ARM::MVE_VLDRHU16_post:
2976 case ARM::MVE_VLDRWU32_post:
2977 case ARM::MVE_VSTRB16_post:
2978 case ARM::MVE_VSTRB32_post:
2979 case ARM::MVE_VSTRH32_post:
2980 case ARM::MVE_VSTRBU8_post:
2981 case ARM::MVE_VSTRHU16_post:
2982 case ARM::MVE_VSTRWU32_post:
2989 switch (
MI.getOpcode()) {
2990 case ARM::MVE_VLDRBS16_pre:
2991 case ARM::MVE_VLDRBS32_pre:
2992 case ARM::MVE_VLDRBU16_pre:
2993 case ARM::MVE_VLDRBU32_pre:
2994 case ARM::MVE_VLDRHS32_pre:
2995 case ARM::MVE_VLDRHU32_pre:
2996 case ARM::MVE_VLDRBU8_pre:
2997 case ARM::MVE_VLDRHU16_pre:
2998 case ARM::MVE_VLDRWU32_pre:
2999 case ARM::MVE_VSTRB16_pre:
3000 case ARM::MVE_VSTRB32_pre:
3001 case ARM::MVE_VSTRH32_pre:
3002 case ARM::MVE_VSTRBU8_pre:
3003 case ARM::MVE_VSTRHU16_pre:
3004 case ARM::MVE_VSTRWU32_pre:
3017 int &CodesizeEstimate) {
3026 CodesizeEstimate += 1;
3027 return Imm < 0 && -Imm < ((1 << 8) * 1);
3040 MI->getOperand(BaseOp).setReg(NewBaseReg);
3048 int OldOffset =
MI->getOperand(BaseOp + 1).getImm();
3050 MI->getOperand(BaseOp + 1).setImm(OldOffset -
Offset);
3052 unsigned ConvOpcode;
3053 switch (
MI->getOpcode()) {
3054 case ARM::t2LDRHi12:
3055 ConvOpcode = ARM::t2LDRHi8;
3057 case ARM::t2LDRSHi12:
3058 ConvOpcode = ARM::t2LDRSHi8;
3060 case ARM::t2LDRBi12:
3061 ConvOpcode = ARM::t2LDRBi8;
3063 case ARM::t2LDRSBi12:
3064 ConvOpcode = ARM::t2LDRSBi8;
3066 case ARM::t2STRHi12:
3067 ConvOpcode = ARM::t2STRHi8;
3069 case ARM::t2STRBi12:
3070 ConvOpcode = ARM::t2STRBi8;
3076 "Illegal Address Immediate after convert!");
3080 .
add(
MI->getOperand(0))
3081 .
add(
MI->getOperand(1))
3083 .
add(
MI->getOperand(3))
3084 .
add(
MI->getOperand(4))
3086 MI->eraseFromParent();
3105 TRC =
TII->getRegClass(
MCID, 2);
3116 .
add(
MI->getOperand(0))
3117 .
add(
MI->getOperand(1))
3119 .
add(
MI->getOperand(3))
3120 .
add(
MI->getOperand(4))
3121 .
add(
MI->getOperand(5))
3124 if (
MI->mayLoad()) {
3126 .
add(
MI->getOperand(0))
3128 .
add(
MI->getOperand(1))
3130 .
add(
MI->getOperand(3))
3131 .
add(
MI->getOperand(4))
3136 .
add(
MI->getOperand(0))
3137 .
add(
MI->getOperand(1))
3139 .
add(
MI->getOperand(3))
3140 .
add(
MI->getOperand(4))
3164bool ARMPreAllocLoadStoreOpt::DistributeIncrements(
Register Base) {
3167 MachineInstr *BaseAccess =
nullptr;
3168 MachineInstr *PrePostInc =
nullptr;
3173 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3184 if (!
Use.getOperand(BaseOp).isReg() ||
3185 Use.getOperand(BaseOp).getReg() !=
Base)
3189 else if (
Use.getOperand(BaseOp + 1).getImm() == 0)
3192 OtherAccesses.
insert(&Use);
3195 int IncrementOffset;
3201 if (
Increment->definesRegister(ARM::CPSR,
nullptr) ||
3205 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on VirtualReg "
3206 <<
Base.virtRegIndex() <<
"\n");
3210 for (MachineInstr &Use :
3212 if (&Use == BaseAccess || (
Use.getOpcode() != TargetOpcode::PHI &&
3214 LLVM_DEBUG(
dbgs() <<
" BaseAccess doesn't dominate use of increment\n");
3224 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on postinc\n");
3228 else if (PrePostInc) {
3236 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on already "
3237 <<
"indexed VirtualReg " <<
Base.virtRegIndex() <<
"\n");
3240 BaseAccess = PrePostInc;
3254 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3255 int CodesizeEstimate = -1;
3256 for (
auto *Use : OtherAccesses) {
3258 SuccessorAccesses.
insert(Use);
3261 Use->getOperand(BaseOp + 1).getImm() -
3263 TII, CodesizeEstimate)) {
3264 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on use\n");
3267 }
else if (!DT->
dominates(Use, BaseAccess)) {
3269 dbgs() <<
" Unknown dominance relation between Base and Use\n");
3273 if (STI->
hasMinSize() && CodesizeEstimate > 0) {
3274 LLVM_DEBUG(
dbgs() <<
" Expected to grow instructions under minsize\n");
3282 NewBaseReg =
Increment->getOperand(0).getReg();
3283 MachineInstr *BaseAccessPost =
3287 (void)BaseAccessPost;
3291 for (
auto *Use : SuccessorAccesses) {
3300 Op.setIsKill(
false);
3304bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3306 SmallSetVector<Register, 4> Visited;
3307 for (
auto &
MBB : *MF) {
3308 for (
auto &
MI :
MBB) {
3310 if (BaseOp == -1 || !
MI.getOperand(BaseOp).isReg())
3314 if (!
Base.isVirtual())
3321 for (
auto Base : Visited)
3330 return new ARMPreAllocLoadStoreOptLegacy();
3331 return new ARMLoadStoreOptLegacy();
3337 ARMLoadStoreOpt Impl;
3338 bool Changed = Impl.runOnMachineFunction(MF);
3349 ARMPreAllocLoadStoreOpt Impl;
3355 bool Changed = Impl.runOnMachineFunction(MF,
AA, DT);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoadSingle(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isThumb2Function() const
bool isThumbFunction() const
bool shouldSignReturnAddress() const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const ARMBaseInstrInfo * getInstrInfo() const override
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
Align getDualLoadStoreAlignment() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
A parsed version of the target data layout string in and methods for querying it.
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
This namespace contains all of the command line option processing machinery.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
NodeAddr< UseNode * > Use
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr RegState getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned M1(unsigned Val)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr RegState getDefRegState(bool B)
FunctionPass * createARMLoadStoreOptLegacyPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
@ Increment
Incrementally increasing token ID.
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr RegState getUndefRegState(bool B)
This struct is a compact representation of a valid (non-zero power of two) alignment.