80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
94 using OpcodePair = std::pair<unsigned, unsigned>;
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(
T,
unsigned,
T &,
T &)>;
99 std::function<void(
MachineInstr &, OpcodePair,
unsigned,
unsigned,
116 template <
typename T>
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
123 template <
typename T>
124 bool visitADDSUB(
unsigned PosOpc,
unsigned NegOpc,
MachineInstr &
MI);
125 template <
typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs,
MachineInstr &
MI);
129 enum class SplitStrategy {
133 template <
typename T>
135 SplitStrategy Strategy,
unsigned OtherOpc = 0);
147 return "AArch64 MI Peephole Optimization pass";
157char AArch64MIPeepholeOpt::ID = 0;
162 "AArch64 MI Peephole Optimization",
false,
false)
166 T UImm =
static_cast<T>(Imm);
167 assert(UImm && (UImm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
198 assert(Imm && (Imm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
205 unsigned LowestGapBitUnset =
209 assert(LowestGapBitUnset <
sizeof(
T) * CHAR_BIT &&
"Undefined behaviour!");
210 T NewImm1 = (
static_cast<T>(1) << LowestGapBitUnset) -
226 SplitStrategy Strategy,
238 return splitTwoPartImm<T>(
240 [
Opc, Strategy, OtherOpc](
T Imm,
unsigned RegSize,
T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
250 if (Insn.
size() == 1)
253 bool SplitSucc =
false;
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm,
RegSize, Imm0, Imm1);
258 case SplitStrategy::Disjoint:
263 return std::make_pair(
Opc, !OtherOpc ?
Opc : OtherOpc);
266 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
270 MachineBasicBlock *
MBB =
MI.getParent();
280bool AArch64MIPeepholeOpt::visitORR(MachineInstr &
MI) {
285 if (
MI.getOperand(3).getImm() != 0)
288 if (
MI.getOperand(1).getReg() != AArch64::WZR)
305 if (SrcMI->
getOpcode() == TargetOpcode::COPY &&
307 const TargetRegisterClass *RC =
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
321 TII->get(TargetOpcode::COPY), CpySrc)
331 else if (SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END)
339 MI.eraseFromParent();
344bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &
MI) {
346 if (
MI.getOperand(1).getReg() !=
MI.getOperand(2).getReg())
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
355 .
addReg(
MI.getOperand(0).getReg(), RegState::Define)
360 MI.eraseFromParent();
364bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &
MI) {
372 if (!
MI.isRegTiedToDefOperand(1))
376 const TargetRegisterClass *RC = MRI->
getRegClass(DstReg);
391 if ((SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
396 MachineInstr *SubregMI =
398 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
399 .
add(
MI.getOperand(2))
400 .
add(
MI.getOperand(3));
403 MI.eraseFromParent();
412 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
413 (Imm & ~
static_cast<T>(0xffffff)) != 0)
419 if (Insn.
size() == 1)
423 Imm0 = (Imm >> 12) & 0xfff;
429bool AArch64MIPeepholeOpt::visitADDSUB(
430 unsigned PosOpc,
unsigned NegOpc, MachineInstr &
MI) {
447 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
448 MI.getOperand(1).getReg() == AArch64::WZR)
451 return splitTwoPartImm<T>(
453 [PosOpc, NegOpc](
T Imm,
unsigned RegSize,
T &Imm0,
454 T &Imm1) -> std::optional<OpcodePair> {
456 return std::make_pair(PosOpc, PosOpc);
458 return std::make_pair(NegOpc, NegOpc);
461 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
465 MachineBasicBlock *
MBB =
MI.getParent();
478bool AArch64MIPeepholeOpt::visitADDSSUBS(
479 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &
MI) {
483 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
484 MI.getOperand(1).getReg() == AArch64::WZR)
487 return splitTwoPartImm<T>(
490 &MRI = MRI](
T Imm,
unsigned RegSize,
T &Imm0,
491 T &Imm1) -> std::optional<OpcodePair> {
503 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
507 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
511 MachineBasicBlock *
MBB =
MI.getParent();
525bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &
MI,
526 MachineInstr *&MovMI,
527 MachineInstr *&SubregToRegMI) {
529 MachineBasicBlock *
MBB =
MI.getParent();
531 if (L && !
L->isLoopInvariant(
MI))
540 SubregToRegMI =
nullptr;
541 if (MovMI->
getOpcode() == TargetOpcode::SUBREG_TO_REG) {
542 SubregToRegMI = MovMI;
548 if (MovMI->
getOpcode() != AArch64::MOVi32imm &&
549 MovMI->
getOpcode() != AArch64::MOVi64imm)
564bool AArch64MIPeepholeOpt::splitTwoPartImm(
566 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
569 "Invalid RegSize for legal immediate peephole optimization");
572 MachineInstr *MovMI, *SubregToRegMI;
573 if (!checkMovImmInstr(
MI, MovMI, SubregToRegMI))
585 if (
auto R = SplitAndOpc(Imm,
RegSize, Imm0, Imm1))
596 const TargetRegisterClass *FirstInstrDstRC =
597 TII->getRegClass(
TII->get(Opcode.first), 0);
598 const TargetRegisterClass *FirstInstrOperandRC =
599 TII->getRegClass(
TII->get(Opcode.first), 1);
600 const TargetRegisterClass *SecondInstrDstRC =
601 (Opcode.first == Opcode.second)
604 const TargetRegisterClass *SecondInstrOperandRC =
605 (Opcode.first == Opcode.second)
606 ? FirstInstrOperandRC
607 :
TII->getRegClass(
TII->get(Opcode.second), 1);
622 if (DstReg != NewDstReg)
626 BuildInstr(
MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
630 if (DstReg != NewDstReg) {
632 MI.getOperand(0).setReg(DstReg);
636 MI.eraseFromParent();
644bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &
MI,
unsigned Opc) {
661 if (!SrcMI || SrcMI->
getOpcode() != TargetOpcode::COPY)
668 &AArch64::FPR128RegClass) {
676 MachineInstr *INSvilaneMI =
678 .
add(
MI.getOperand(1))
679 .
add(
MI.getOperand(2))
685 MI.eraseFromParent();
695 if (!
MI->getOperand(0).isReg() || !
MI->getOperand(0).isDef())
698 if (RC != &AArch64::FPR64RegClass)
700 if (
MI->getOpcode() == TargetOpcode::COPY) {
704 if (
SrcOp.getSubReg())
707 auto IsGPR64Like = [&]() ->
bool {
709 return AArch64::GPR64allRegClass.hasSubClassEq(
711 return AArch64::GPR64allRegClass.contains(SrcReg);
715 assert(
TII &&
"Expected InstrInfo when materializing COPYs");
718 bool SrcKill = SrcMO.
isKill();
720 if (MRI->
getRegClass(SrcReg) != &AArch64::GPR64RegClass) {
725 TII->get(TargetOpcode::COPY), NewSrc)
730 }
else if (!AArch64::GPR64RegClass.
contains(SrcReg)) {
738 MI->setDesc(
TII->get(AArch64::FMOVXDr));
741 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
744bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &
MI) {
753 if (Low64MI->
getOpcode() != AArch64::INSERT_SUBREG)
773 if (!High64MI || High64MI->
getOpcode() != AArch64::INSERT_SUBREG)
776 if (High64MI && High64MI->
getOpcode() == TargetOpcode::COPY)
778 if (!High64MI || (High64MI->
getOpcode() != AArch64::MOVID &&
779 High64MI->
getOpcode() != AArch64::MOVIv2d_ns))
789 MI.eraseFromParent();
794bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &
MI) {
808 MI.eraseFromParent();
813bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &
MI) {
816 int64_t Immr =
MI.getOperand(2).getImm();
817 int64_t Imms =
MI.getOperand(3).getImm();
819 bool IsLSR = Imms == 31 && Immr <= Imms;
820 bool IsLSL = Immr == Imms + 33;
821 if (!IsLSR && !IsLSL)
828 const TargetRegisterClass *DstRC64 =
829 TII->getRegClass(
TII->get(
MI.getOpcode()), 0);
830 const TargetRegisterClass *DstRC32 =
831 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
832 assert(DstRC32 &&
"Destination register class of UBFMXri doesn't have a "
833 "sub_32 subregister class");
835 const TargetRegisterClass *SrcRC64 =
836 TII->getRegClass(
TII->get(
MI.getOpcode()), 1);
837 const TargetRegisterClass *SrcRC32 =
838 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
839 assert(SrcRC32 &&
"Source register class of UBFMXri doesn't have a sub_32 "
840 "subregister class");
842 Register DstReg64 =
MI.getOperand(0).getReg();
844 Register SrcReg64 =
MI.getOperand(1).getReg();
849 .
addReg(SrcReg64, {}, AArch64::sub_32);
856 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
859 MI.eraseFromParent();
866bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &
MI) {
867 Register InputReg =
MI.getOperand(1).getReg();
868 if (
MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
873 SmallPtrSet<MachineInstr *, 4> DeadInstrs;
885 auto getSXTWSrcReg = [](MachineInstr *SrcMI) ->
Register {
886 if (SrcMI->
getOpcode() != AArch64::SBFMXri ||
889 return AArch64::NoRegister;
893 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) ->
Register {
894 if (SrcMI->
getOpcode() != AArch64::SUBREG_TO_REG ||
897 return AArch64::NoRegister;
899 if (!Orr || Orr->
getOpcode() != AArch64::ORRWrr ||
902 return AArch64::NoRegister;
904 if (!Cpy || Cpy->
getOpcode() != AArch64::COPY ||
906 return AArch64::NoRegister;
911 Register SrcReg = getSXTWSrcReg(SrcMI);
913 SrcReg = getUXTWSrcReg(SrcMI);
919 MI.getOperand(1).setReg(SrcReg);
921 for (
auto *DeadMI : DeadInstrs) {
923 DeadMI->eraseFromParent();
928bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
932 TII =
static_cast<const AArch64InstrInfo *
>(MF.
getSubtarget().getInstrInfo());
933 TRI =
static_cast<const AArch64RegisterInfo *
>(
935 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
938 assert(MRI->
isSSA() &&
"Expected to be run on SSA form!");
942 for (MachineBasicBlock &
MBB : MF) {
944 switch (
MI.getOpcode()) {
947 case AArch64::INSERT_SUBREG:
950 case AArch64::ANDWrr:
951 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri,
MI,
952 SplitStrategy::Intersect);
954 case AArch64::ANDXrr:
955 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri,
MI,
956 SplitStrategy::Intersect);
958 case AArch64::ANDSWrr:
959 Changed |= trySplitLogicalImm<uint32_t>(
960 AArch64::ANDWri,
MI, SplitStrategy::Intersect, AArch64::ANDSWri);
962 case AArch64::ANDSXrr:
963 Changed |= trySplitLogicalImm<uint64_t>(
964 AArch64::ANDXri,
MI, SplitStrategy::Intersect, AArch64::ANDSXri);
966 case AArch64::EORWrr:
967 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri,
MI,
968 SplitStrategy::Disjoint);
970 case AArch64::EORXrr:
971 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri,
MI,
972 SplitStrategy::Disjoint);
974 case AArch64::ORRWrr:
975 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri,
MI,
976 SplitStrategy::Disjoint);
978 case AArch64::ORRXrr:
979 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri,
MI,
980 SplitStrategy::Disjoint);
982 case AArch64::ORRWrs:
985 case AArch64::ADDWrr:
986 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri,
MI);
988 case AArch64::SUBWrr:
989 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri,
MI);
991 case AArch64::ADDXrr:
992 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri,
MI);
994 case AArch64::SUBXrr:
995 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri,
MI);
997 case AArch64::ADDSWrr:
999 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
1000 {AArch64::SUBWri, AArch64::SUBSWri},
MI);
1002 case AArch64::SUBSWrr:
1004 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
1005 {AArch64::ADDWri, AArch64::ADDSWri},
MI);
1007 case AArch64::ADDSXrr:
1009 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
1010 {AArch64::SUBXri, AArch64::SUBSXri},
MI);
1012 case AArch64::SUBSXrr:
1014 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
1015 {AArch64::ADDXri, AArch64::ADDSXri},
MI);
1017 case AArch64::CSELWr:
1018 case AArch64::CSELXr:
1021 case AArch64::INSvi64gpr:
1022 Changed |= visitINSviGPR(
MI, AArch64::INSvi64lane);
1024 case AArch64::INSvi32gpr:
1025 Changed |= visitINSviGPR(
MI, AArch64::INSvi32lane);
1027 case AArch64::INSvi16gpr:
1028 Changed |= visitINSviGPR(
MI, AArch64::INSvi16lane);
1030 case AArch64::INSvi8gpr:
1031 Changed |= visitINSviGPR(
MI, AArch64::INSvi8lane);
1033 case AArch64::INSvi64lane:
1036 case AArch64::FMOVDr:
1039 case AArch64::UBFMXri:
1053 return new AArch64MIPeepholeOpt();
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI, const AArch64InstrInfo *TII)
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
constexpr RegState getKillRegState(bool B)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
RegState getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.