44#define DEBUG_TYPE "legalizer"
57static std::pair<int, int>
63 unsigned NumParts =
Size / NarrowSize;
64 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
67 if (LeftoverSize == 0)
72 if (LeftoverSize % EltSize != 0)
81 return std::make_pair(NumParts, NumLeftover);
89 switch (Ty.getSizeInBits()) {
130 auto Step = LI.getAction(
MI, MRI);
131 switch (Step.Action) {
146 return bitcast(
MI, Step.TypeIdx, Step.NewType);
149 return lower(
MI, Step.TypeIdx, Step.NewType);
158 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
166void LegalizerHelper::insertParts(
Register DstReg,
188 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
190 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
191 return mergeMixedSubvectors(DstReg, AllRegs);
197 extractGCDType(GCDRegs, GCDTy, PartReg);
198 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
199 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
204 LLT Ty = MRI.getType(
Reg);
212void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
215 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
216 appendVectorElts(AllElts, PartRegs[i]);
219 if (!MRI.getType(Leftover).isVector())
222 appendVectorElts(AllElts, Leftover);
224 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
230 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
232 const int StartIdx = Regs.
size();
233 const int NumResults =
MI.getNumOperands() - 1;
235 for (
int I = 0;
I != NumResults; ++
I)
236 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
241 LLT SrcTy = MRI.getType(SrcReg);
242 if (SrcTy == GCDTy) {
248 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
255 LLT SrcTy = MRI.getType(SrcReg);
257 extractGCDType(Parts, GCDTy, SrcReg);
261LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
263 unsigned PadStrategy) {
268 int NumOrigSrc = VRegs.
size();
274 if (NumOrigSrc < NumParts * NumSubParts) {
275 if (PadStrategy == TargetOpcode::G_ZEXT)
276 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
277 else if (PadStrategy == TargetOpcode::G_ANYEXT)
278 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
280 assert(PadStrategy == TargetOpcode::G_SEXT);
285 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
301 for (
int I = 0;
I != NumParts; ++
I) {
302 bool AllMergePartsArePadding =
true;
305 for (
int J = 0; J != NumSubParts; ++J) {
306 int Idx =
I * NumSubParts + J;
307 if (Idx >= NumOrigSrc) {
308 SubMerge[J] = PadReg;
312 SubMerge[J] = VRegs[Idx];
315 AllMergePartsArePadding =
false;
321 if (AllMergePartsArePadding && !AllPadReg) {
322 if (PadStrategy == TargetOpcode::G_ANYEXT)
323 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
324 else if (PadStrategy == TargetOpcode::G_ZEXT)
325 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
334 Remerge[
I] = AllPadReg;
338 if (NumSubParts == 1)
339 Remerge[
I] = SubMerge[0];
341 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
344 if (AllMergePartsArePadding && !AllPadReg)
345 AllPadReg = Remerge[
I];
348 VRegs = std::move(Remerge);
352void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
354 LLT DstTy = MRI.getType(DstReg);
359 if (DstTy == LCMTy) {
360 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
364 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
373 UnmergeDefs[0] = DstReg;
374 for (
unsigned I = 1;
I != NumDefs; ++
I)
375 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
378 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
386#define RTLIBCASE_INT(LibcallPrefix) \
390 return RTLIB::LibcallPrefix##32; \
392 return RTLIB::LibcallPrefix##64; \
394 return RTLIB::LibcallPrefix##128; \
396 llvm_unreachable("unexpected size"); \
400#define RTLIBCASE(LibcallPrefix) \
404 return RTLIB::LibcallPrefix##32; \
406 return RTLIB::LibcallPrefix##64; \
408 return RTLIB::LibcallPrefix##80; \
410 return RTLIB::LibcallPrefix##128; \
412 llvm_unreachable("unexpected size"); \
417 case TargetOpcode::G_LROUND:
419 case TargetOpcode::G_LLROUND:
421 case TargetOpcode::G_MUL:
423 case TargetOpcode::G_SDIV:
425 case TargetOpcode::G_UDIV:
427 case TargetOpcode::G_SREM:
429 case TargetOpcode::G_UREM:
431 case TargetOpcode::G_CTLZ_ZERO_POISON:
433 case TargetOpcode::G_FADD:
435 case TargetOpcode::G_FSUB:
437 case TargetOpcode::G_FMUL:
439 case TargetOpcode::G_FDIV:
441 case TargetOpcode::G_FEXP:
443 case TargetOpcode::G_FEXP2:
445 case TargetOpcode::G_FEXP10:
447 case TargetOpcode::G_FREM:
449 case TargetOpcode::G_FPOW:
451 case TargetOpcode::G_FPOWI:
453 case TargetOpcode::G_FMA:
455 case TargetOpcode::G_FSIN:
457 case TargetOpcode::G_FCOS:
459 case TargetOpcode::G_FTAN:
461 case TargetOpcode::G_FASIN:
463 case TargetOpcode::G_FACOS:
465 case TargetOpcode::G_FATAN:
467 case TargetOpcode::G_FATAN2:
469 case TargetOpcode::G_FSINH:
471 case TargetOpcode::G_FCOSH:
473 case TargetOpcode::G_FTANH:
475 case TargetOpcode::G_FSINCOS:
477 case TargetOpcode::G_FMODF:
479 case TargetOpcode::G_FLOG10:
481 case TargetOpcode::G_FLOG:
483 case TargetOpcode::G_FLOG2:
485 case TargetOpcode::G_FLDEXP:
487 case TargetOpcode::G_FCEIL:
489 case TargetOpcode::G_FFLOOR:
491 case TargetOpcode::G_FMINNUM:
493 case TargetOpcode::G_FMAXNUM:
495 case TargetOpcode::G_FMINIMUMNUM:
497 case TargetOpcode::G_FMAXIMUMNUM:
499 case TargetOpcode::G_FSQRT:
501 case TargetOpcode::G_FRINT:
503 case TargetOpcode::G_FNEARBYINT:
505 case TargetOpcode::G_INTRINSIC_TRUNC:
507 case TargetOpcode::G_INTRINSIC_ROUND:
509 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
511 case TargetOpcode::G_INTRINSIC_LRINT:
513 case TargetOpcode::G_INTRINSIC_LLRINT:
533 AttributeList CallerAttrs =
F.getAttributes();
534 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
535 .removeAttribute(Attribute::NoAlias)
536 .removeAttribute(Attribute::NonNull)
541 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
542 CallerAttrs.hasRetAttr(Attribute::SExt))
553 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
560 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
568 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
571 if (Ret->getNumImplicitOperands() != 1)
574 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
591 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
596 Info.OrigRet = Result;
599 (Result.Ty->isVoidTy() ||
600 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
608 if (
MI && Info.LoweredTailCall) {
609 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
619 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
620 "Expected instr following MI to be return or debug inst?");
623 Next->eraseFromParent();
624 }
while (
MI->getNextNode());
639 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
640 if (LibcallImpl == RTLIB::Unsupported)
644 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
658 Args.push_back({MO.getReg(), OpType, 0});
677 unsigned AddrSpace =
DL.getAllocaAddrSpace();
695 if (LibcallResult != LegalizeResult::Legalized)
703 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
704 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
705 MI.eraseFromParent();
720 LLT DstTy = MRI.getType(DstFrac);
725 unsigned AddrSpace =
DL.getAllocaAddrSpace();
726 MachinePointerInfo PtrInfo;
735 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
738 if (LibcallResult != LegalizeResult::Legalized)
744 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
745 MI.eraseFromParent();
756 case TargetOpcode::G_FPEXT:
758 case TargetOpcode::G_FPTRUNC:
760 case TargetOpcode::G_FPTOSI:
762 case TargetOpcode::G_FPTOUI:
764 case TargetOpcode::G_SITOFP:
766 case TargetOpcode::G_UITOFP:
776 if (FromType->isIntegerTy()) {
777 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
778 Arg.
Flags[0].setSExt();
780 Arg.
Flags[0].setZExt();
791 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
795 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
799 LLT OpLLT = MRI.getType(Reg);
800 Type *OpTy =
nullptr;
805 Args.push_back({Reg, OpTy, 0});
808 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
809 RTLIB::Libcall RTLibcall;
810 unsigned Opc =
MI.getOpcode();
812 case TargetOpcode::G_BZERO:
813 RTLibcall = RTLIB::BZERO;
815 case TargetOpcode::G_MEMCPY:
816 RTLibcall = RTLIB::MEMCPY;
817 Args[0].Flags[0].setReturned();
819 case TargetOpcode::G_MEMMOVE:
820 RTLibcall = RTLIB::MEMMOVE;
821 Args[0].Flags[0].setReturned();
823 case TargetOpcode::G_MEMSET:
824 RTLibcall = RTLIB::MEMSET;
825 Args[0].Flags[0].setReturned();
834 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
837 if (RTLibcallImpl == RTLIB::Unsupported) {
844 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
851 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
858 if (Info.LoweredTailCall) {
859 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
869 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
870 "Expected instr following MI to be return or debug inst?");
873 Next->eraseFromParent();
874 }
while (
MI.getNextNode());
884 unsigned Opc =
MI.getOpcode();
886 auto &MMO = AtomicMI.getMMO();
887 auto Ordering = MMO.getMergedOrdering();
888 LLT MemType = MMO.getMemoryType();
891 return RTLIB::UNKNOWN_LIBCALL;
893#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
895 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
897 case TargetOpcode::G_ATOMIC_CMPXCHG:
898 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
900 return getOutlineAtomicHelper(LC, Ordering, MemSize);
902 case TargetOpcode::G_ATOMICRMW_XCHG: {
903 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
904 return getOutlineAtomicHelper(LC, Ordering, MemSize);
906 case TargetOpcode::G_ATOMICRMW_ADD:
907 case TargetOpcode::G_ATOMICRMW_SUB: {
908 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
909 return getOutlineAtomicHelper(LC, Ordering, MemSize);
911 case TargetOpcode::G_ATOMICRMW_AND: {
912 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
913 return getOutlineAtomicHelper(LC, Ordering, MemSize);
915 case TargetOpcode::G_ATOMICRMW_OR: {
916 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
917 return getOutlineAtomicHelper(LC, Ordering, MemSize);
919 case TargetOpcode::G_ATOMICRMW_XOR: {
920 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
921 return getOutlineAtomicHelper(LC, Ordering, MemSize);
924 return RTLIB::UNKNOWN_LIBCALL;
937 unsigned Opc =
MI.getOpcode();
939 case TargetOpcode::G_ATOMIC_CMPXCHG:
940 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
943 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
944 MI.getFirst4RegLLTs();
947 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
948 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
949 NewLLT) =
MI.getFirst5RegLLTs();
959 case TargetOpcode::G_ATOMICRMW_XCHG:
960 case TargetOpcode::G_ATOMICRMW_ADD:
961 case TargetOpcode::G_ATOMICRMW_SUB:
962 case TargetOpcode::G_ATOMICRMW_AND:
963 case TargetOpcode::G_ATOMICRMW_OR:
964 case TargetOpcode::G_ATOMICRMW_XOR: {
965 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
968 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
972 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
987 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
989 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
992 if (RTLibcallImpl == RTLIB::Unsupported) {
999 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1013static RTLIB::Libcall
1015 RTLIB::Libcall RTLibcall;
1016 switch (
MI.getOpcode()) {
1017 case TargetOpcode::G_GET_FPENV:
1018 RTLibcall = RTLIB::FEGETENV;
1020 case TargetOpcode::G_SET_FPENV:
1021 case TargetOpcode::G_RESET_FPENV:
1022 RTLibcall = RTLIB::FESETENV;
1024 case TargetOpcode::G_GET_FPMODE:
1025 RTLibcall = RTLIB::FEGETMODE;
1027 case TargetOpcode::G_SET_FPMODE:
1028 case TargetOpcode::G_RESET_FPMODE:
1029 RTLibcall = RTLIB::FESETMODE;
1061 LLT StateTy = MRI.getType(Dst);
1064 MachinePointerInfo TempPtrInfo;
1068 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1073 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1081 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1099 LLT StateTy = MRI.getType(Src);
1102 MachinePointerInfo TempPtrInfo;
1111 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1116 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1117 LocObserver,
nullptr);
1123static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1125#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1129 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1131 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1133 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1135 llvm_unreachable("unexpected size"); \
1166 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1169 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1176 LLT DstTy = MRI.getType(DstReg);
1177 const auto Cond =
Cmp->getCond();
1182 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1187 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1191 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1198 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1204 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1206 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1219 const auto [OeqLibcall, OeqPred] =
1221 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1223 const auto [UnoLibcall, UnoPred] =
1225 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1240 const auto [OeqLibcall, OeqPred] =
1245 const auto [UnoLibcall, UnoPred] =
1250 if (NotOeq && NotUno)
1269 const auto [InversedLibcall, InversedPred] =
1271 if (!BuildLibcall(InversedLibcall,
1296 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1298 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1301 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1307 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1312 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1314 switch (
MI.getOpcode()) {
1317 case TargetOpcode::G_MUL:
1318 case TargetOpcode::G_SDIV:
1319 case TargetOpcode::G_UDIV:
1320 case TargetOpcode::G_SREM:
1321 case TargetOpcode::G_UREM:
1322 case TargetOpcode::G_CTLZ_ZERO_POISON: {
1323 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1331 case TargetOpcode::G_FADD:
1332 case TargetOpcode::G_FSUB:
1333 case TargetOpcode::G_FMUL:
1334 case TargetOpcode::G_FDIV:
1335 case TargetOpcode::G_FMA:
1336 case TargetOpcode::G_FPOW:
1337 case TargetOpcode::G_FREM:
1338 case TargetOpcode::G_FCOS:
1339 case TargetOpcode::G_FSIN:
1340 case TargetOpcode::G_FTAN:
1341 case TargetOpcode::G_FACOS:
1342 case TargetOpcode::G_FASIN:
1343 case TargetOpcode::G_FATAN:
1344 case TargetOpcode::G_FATAN2:
1345 case TargetOpcode::G_FCOSH:
1346 case TargetOpcode::G_FSINH:
1347 case TargetOpcode::G_FTANH:
1348 case TargetOpcode::G_FLOG10:
1349 case TargetOpcode::G_FLOG:
1350 case TargetOpcode::G_FLOG2:
1351 case TargetOpcode::G_FEXP:
1352 case TargetOpcode::G_FEXP2:
1353 case TargetOpcode::G_FEXP10:
1354 case TargetOpcode::G_FCEIL:
1355 case TargetOpcode::G_FFLOOR:
1356 case TargetOpcode::G_FMINNUM:
1357 case TargetOpcode::G_FMAXNUM:
1358 case TargetOpcode::G_FMINIMUMNUM:
1359 case TargetOpcode::G_FMAXIMUMNUM:
1360 case TargetOpcode::G_FSQRT:
1361 case TargetOpcode::G_FRINT:
1362 case TargetOpcode::G_FNEARBYINT:
1363 case TargetOpcode::G_INTRINSIC_TRUNC:
1364 case TargetOpcode::G_INTRINSIC_ROUND:
1365 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1366 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1370 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1378 case TargetOpcode::G_FSINCOS: {
1379 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1383 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1388 case TargetOpcode::G_FMODF: {
1389 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1393 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1398 case TargetOpcode::G_LROUND:
1399 case TargetOpcode::G_LLROUND:
1400 case TargetOpcode::G_INTRINSIC_LRINT:
1401 case TargetOpcode::G_INTRINSIC_LLRINT: {
1402 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1406 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1408 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1414 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1417 MI.eraseFromParent();
1420 case TargetOpcode::G_FPOWI:
1421 case TargetOpcode::G_FLDEXP: {
1422 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1426 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1428 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1433 {
MI.getOperand(1).getReg(), HLTy, 0},
1434 {
MI.getOperand(2).getReg(), ITy, 1}};
1435 Args[1].Flags[0].setSExt();
1437 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1442 case TargetOpcode::G_FPEXT:
1443 case TargetOpcode::G_FPTRUNC: {
1446 if (!FromTy || !ToTy)
1453 case TargetOpcode::G_FCMP: {
1457 MI.eraseFromParent();
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1465 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1469 FromTy, LocObserver);
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1481 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1488 case TargetOpcode::G_ATOMICRMW_XCHG:
1489 case TargetOpcode::G_ATOMICRMW_ADD:
1490 case TargetOpcode::G_ATOMICRMW_SUB:
1491 case TargetOpcode::G_ATOMICRMW_AND:
1492 case TargetOpcode::G_ATOMICRMW_OR:
1493 case TargetOpcode::G_ATOMICRMW_XOR:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1501 case TargetOpcode::G_BZERO:
1502 case TargetOpcode::G_MEMCPY:
1503 case TargetOpcode::G_MEMMOVE:
1504 case TargetOpcode::G_MEMSET: {
1509 MI.eraseFromParent();
1512 case TargetOpcode::G_GET_FPENV:
1513 case TargetOpcode::G_GET_FPMODE: {
1519 case TargetOpcode::G_SET_FPENV:
1520 case TargetOpcode::G_SET_FPMODE: {
1526 case TargetOpcode::G_RESET_FPENV:
1527 case TargetOpcode::G_RESET_FPMODE: {
1535 MI.eraseFromParent();
1542 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1545 switch (
MI.getOpcode()) {
1548 case TargetOpcode::G_IMPLICIT_DEF: {
1550 LLT DstTy = MRI.getType(DstReg);
1558 if (SizeOp0 % NarrowSize != 0) {
1563 MI.eraseFromParent();
1567 int NumParts = SizeOp0 / NarrowSize;
1570 for (
int i = 0; i < NumParts; ++i)
1574 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1576 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1577 MI.eraseFromParent();
1580 case TargetOpcode::G_CONSTANT: {
1581 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1582 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1583 unsigned TotalSize = Ty.getSizeInBits();
1585 int NumParts = TotalSize / NarrowSize;
1588 for (
int I = 0;
I != NumParts; ++
I) {
1589 unsigned Offset =
I * NarrowSize;
1596 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1598 if (LeftoverBits != 0) {
1602 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1606 insertParts(
MI.getOperand(0).getReg(),
1607 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1609 MI.eraseFromParent();
1612 case TargetOpcode::G_SEXT:
1613 case TargetOpcode::G_ZEXT:
1614 case TargetOpcode::G_ANYEXT:
1616 case TargetOpcode::G_TRUNC: {
1620 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1622 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1626 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1627 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1628 MI.eraseFromParent();
1631 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1632 case TargetOpcode::G_FREEZE: {
1636 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1641 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1643 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1645 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1649 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1650 MI.eraseFromParent();
1653 case TargetOpcode::G_ADD:
1654 case TargetOpcode::G_SUB:
1655 case TargetOpcode::G_SADDO:
1656 case TargetOpcode::G_SSUBO:
1657 case TargetOpcode::G_SADDE:
1658 case TargetOpcode::G_SSUBE:
1659 case TargetOpcode::G_UADDO:
1660 case TargetOpcode::G_USUBO:
1661 case TargetOpcode::G_UADDE:
1662 case TargetOpcode::G_USUBE:
1664 case TargetOpcode::G_MUL:
1665 case TargetOpcode::G_UMULH:
1667 case TargetOpcode::G_EXTRACT:
1669 case TargetOpcode::G_INSERT:
1671 case TargetOpcode::G_LOAD: {
1673 Register DstReg = LoadMI.getDstReg();
1674 LLT DstTy = MRI.getType(DstReg);
1678 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1679 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1680 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1682 LoadMI.eraseFromParent();
1688 case TargetOpcode::G_ZEXTLOAD:
1689 case TargetOpcode::G_SEXTLOAD:
1690 case TargetOpcode::G_FPEXTLOAD: {
1692 Register DstReg = LoadMI.getDstReg();
1693 Register PtrReg = LoadMI.getPointerReg();
1695 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1696 auto &MMO = LoadMI.getMMO();
1699 if (MemSize == NarrowSize) {
1701 }
else if (MemSize < NarrowSize) {
1702 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1703 }
else if (MemSize > NarrowSize) {
1715 LoadMI.eraseFromParent();
1718 case TargetOpcode::G_STORE: {
1721 Register SrcReg = StoreMI.getValueReg();
1722 LLT SrcTy = MRI.getType(SrcReg);
1723 if (SrcTy.isVector())
1726 int NumParts = SizeOp0 / NarrowSize;
1728 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1729 if (SrcTy.isVector() && LeftoverBits != 0)
1732 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1733 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1735 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1736 StoreMI.eraseFromParent();
1742 case TargetOpcode::G_FPTRUNCSTORE: {
1744 Register SrcReg = StoreMI.getValueReg();
1745 Register PtrReg = StoreMI.getPointerReg();
1747 auto &MMO = StoreMI.getMMO();
1749 if (MemSize > NarrowSize) {
1753 auto TmpReg =
MIRBuilder.buildFPTrunc(NarrowTy, SrcReg);
1754 if (MemSize == NarrowSize) {
1756 }
else if (MemSize < NarrowSize) {
1757 MIRBuilder.buildStoreInstr(TargetOpcode::G_FPTRUNCSTORE, TmpReg, PtrReg,
1761 StoreMI.eraseFromParent();
1764 case TargetOpcode::G_SELECT:
1766 case TargetOpcode::G_AND:
1767 case TargetOpcode::G_OR:
1768 case TargetOpcode::G_XOR: {
1780 case TargetOpcode::G_SHL:
1781 case TargetOpcode::G_LSHR:
1782 case TargetOpcode::G_ASHR:
1784 case TargetOpcode::G_CTLZ:
1785 case TargetOpcode::G_CTLZ_ZERO_POISON:
1786 case TargetOpcode::G_CTTZ:
1787 case TargetOpcode::G_CTTZ_ZERO_POISON:
1788 case TargetOpcode::G_CTLS:
1789 case TargetOpcode::G_CTPOP:
1791 switch (
MI.getOpcode()) {
1792 case TargetOpcode::G_CTLZ:
1793 case TargetOpcode::G_CTLZ_ZERO_POISON:
1795 case TargetOpcode::G_CTTZ:
1796 case TargetOpcode::G_CTTZ_ZERO_POISON:
1798 case TargetOpcode::G_CTPOP:
1800 case TargetOpcode::G_CTLS:
1810 case TargetOpcode::G_INTTOPTR:
1818 case TargetOpcode::G_PTRTOINT:
1826 case TargetOpcode::G_PHI: {
1829 if (SizeOp0 % NarrowSize != 0)
1832 unsigned NumParts = SizeOp0 / NarrowSize;
1836 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1844 for (
unsigned i = 0; i < NumParts; ++i) {
1845 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1847 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1848 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1849 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1852 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1854 MI.eraseFromParent();
1857 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1858 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1862 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1868 case TargetOpcode::G_ICMP: {
1870 LLT SrcTy = MRI.getType(LHS);
1876 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1882 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1883 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1889 LLT ResTy = MRI.getType(Dst);
1894 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1896 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1897 auto LHS = std::get<0>(LHSAndRHS);
1898 auto RHS = std::get<1>(LHSAndRHS);
1899 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1906 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1907 auto LHS = std::get<0>(LHSAndRHS);
1908 auto RHS = std::get<1>(LHSAndRHS);
1909 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1910 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1911 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1912 TargetOpcode::G_ZEXT);
1919 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1920 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1921 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1926 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1930 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1945 LHSPartRegs[
I], RHSPartRegs[
I]);
1946 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1952 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1961 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1965 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1966 RHSLeftoverRegs[
I]);
1968 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1969 RHSLeftoverRegs[
I]);
1972 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1973 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1979 MI.eraseFromParent();
1982 case TargetOpcode::G_FCMP:
1991 case TargetOpcode::G_SEXT_INREG: {
1995 int64_t SizeInBits =
MI.getOperand(2).getImm();
2004 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
2005 MO1.
setReg(TruncMIB.getReg(0));
2008 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
2020 if (SizeOp0 % NarrowSize != 0)
2022 int NumParts = SizeOp0 / NarrowSize;
2030 for (
int i = 0; i < NumParts; ++i) {
2031 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2046 for (
int i = 0; i < NumParts; ++i) {
2049 PartialExtensionReg = DstRegs.
back();
2051 assert(PartialExtensionReg &&
2052 "Expected to visit partial extension before full");
2053 if (FullExtensionReg) {
2058 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2060 FullExtensionReg = DstRegs.
back();
2065 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2068 PartialExtensionReg = DstRegs.
back();
2074 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2075 MI.eraseFromParent();
2078 case TargetOpcode::G_BSWAP:
2079 case TargetOpcode::G_BITREVERSE: {
2080 if (SizeOp0 % NarrowSize != 0)
2085 unsigned NumParts = SizeOp0 / NarrowSize;
2086 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2089 for (
unsigned i = 0; i < NumParts; ++i) {
2090 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2091 {SrcRegs[NumParts - 1 - i]});
2095 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2098 MI.eraseFromParent();
2101 case TargetOpcode::G_PTR_ADD:
2102 case TargetOpcode::G_PTRMASK: {
2110 case TargetOpcode::G_FPTOUI:
2111 case TargetOpcode::G_FPTOSI:
2112 case TargetOpcode::G_FPTOUI_SAT:
2113 case TargetOpcode::G_FPTOSI_SAT:
2115 case TargetOpcode::G_FPEXT:
2122 case TargetOpcode::G_FLDEXP:
2123 case TargetOpcode::G_STRICT_FLDEXP:
2125 case TargetOpcode::G_VSCALE: {
2127 LLT Ty = MRI.getType(Dst);
2131 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2132 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2133 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2136 MI.eraseFromParent();
2143 LLT Ty = MRI.getType(Val);
2149 if (Ty.isPointer()) {
2150 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2152 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2158 if (Ty.isPointerVector())
2159 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2160 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2164 unsigned OpIdx,
unsigned ExtOpcode) {
2166 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2167 MO.
setReg(ExtB.getReg(0));
2173 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2174 MO.
setReg(ExtB.getReg(0));
2178 unsigned OpIdx,
unsigned TruncOpcode) {
2180 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2182 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2187 unsigned OpIdx,
unsigned ExtOpcode) {
2189 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2191 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2200 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2202 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2208 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2218 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2225LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2230 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2231 if (DstTy.isVector())
2236 const int SrcSize = SrcTy.getSizeInBits();
2238 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2240 unsigned NumOps =
MI.getNumOperands();
2241 unsigned NumSrc =
MI.getNumOperands() - 1;
2242 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2244 if (WideSize >= DstSize) {
2248 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2249 const unsigned Offset = (
I - 1) * PartSize;
2262 ResultReg = NextResult;
2265 if (WideSize > DstSize)
2267 else if (DstTy.isPointer())
2270 MI.eraseFromParent();
2295 const int GCD = std::gcd(SrcSize, WideSize);
2305 if (GCD == SrcSize) {
2308 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2309 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2315 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2317 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2321 const int PartsPerGCD = WideSize / GCD;
2325 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2327 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2334 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2336 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2337 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2340 MI.eraseFromParent();
2345LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2350 int NumDst =
MI.getNumOperands() - 1;
2351 Register SrcReg =
MI.getOperand(NumDst).getReg();
2352 LLT SrcTy = MRI.getType(SrcReg);
2356 Register Dst0Reg =
MI.getOperand(0).getReg();
2357 LLT DstTy = MRI.getType(Dst0Reg);
2366 dbgs() <<
"Not casting non-integral address space integer\n");
2371 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2379 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2387 for (
int I = 1;
I != NumDst; ++
I) {
2388 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2389 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2393 MI.eraseFromParent();
2404 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2408 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2411 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2429 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2434 if (PartsPerRemerge == 1) {
2437 for (
int I = 0;
I != NumUnmerge; ++
I) {
2438 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2440 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2441 int Idx =
I * PartsPerUnmerge + J;
2443 MIB.addDef(
MI.getOperand(Idx).getReg());
2446 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2450 MIB.addUse(Unmerge.getReg(
I));
2453 SmallVector<Register, 16> Parts;
2454 for (
int J = 0; J != NumUnmerge; ++J)
2455 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2458 for (
int I = 0;
I != NumDst; ++
I) {
2459 for (
int J = 0; J < PartsPerRemerge; ++J) {
2460 const int Idx =
I * PartsPerRemerge + J;
2464 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2465 RemergeParts.
clear();
2469 MI.eraseFromParent();
2474LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2476 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2477 unsigned Offset =
MI.getOperand(2).getImm();
2480 if (SrcTy.
isVector() || DstTy.isVector())
2492 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2496 if (DstTy.isPointer())
2503 MI.eraseFromParent();
2508 LLT ShiftTy = SrcTy;
2517 MI.eraseFromParent();
2548LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2550 if (TypeIdx != 0 || WideTy.
isVector())
2560LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2564 std::optional<Register> CarryIn;
2565 switch (
MI.getOpcode()) {
2568 case TargetOpcode::G_SADDO:
2569 Opcode = TargetOpcode::G_ADD;
2570 ExtOpcode = TargetOpcode::G_SEXT;
2572 case TargetOpcode::G_SSUBO:
2573 Opcode = TargetOpcode::G_SUB;
2574 ExtOpcode = TargetOpcode::G_SEXT;
2576 case TargetOpcode::G_UADDO:
2577 Opcode = TargetOpcode::G_ADD;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2580 case TargetOpcode::G_USUBO:
2581 Opcode = TargetOpcode::G_SUB;
2582 ExtOpcode = TargetOpcode::G_ZEXT;
2584 case TargetOpcode::G_SADDE:
2585 Opcode = TargetOpcode::G_UADDE;
2586 ExtOpcode = TargetOpcode::G_SEXT;
2587 CarryIn =
MI.getOperand(4).getReg();
2589 case TargetOpcode::G_SSUBE:
2590 Opcode = TargetOpcode::G_USUBE;
2591 ExtOpcode = TargetOpcode::G_SEXT;
2592 CarryIn =
MI.getOperand(4).getReg();
2594 case TargetOpcode::G_UADDE:
2595 Opcode = TargetOpcode::G_UADDE;
2596 ExtOpcode = TargetOpcode::G_ZEXT;
2597 CarryIn =
MI.getOperand(4).getReg();
2599 case TargetOpcode::G_USUBE:
2600 Opcode = TargetOpcode::G_USUBE;
2601 ExtOpcode = TargetOpcode::G_ZEXT;
2602 CarryIn =
MI.getOperand(4).getReg();
2618 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2619 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2623 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2625 .buildInstr(Opcode, {WideTy, CarryOutTy},
2626 {LHSExt, RHSExt, *CarryIn})
2629 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2631 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2632 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2633 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2638 MI.eraseFromParent();
2643LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2645 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2647 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2648 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2649 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2662 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2669 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2673 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2674 {ShiftL, ShiftR},
MI.getFlags());
2679 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2682 MI.eraseFromParent();
2687LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2696 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2698 LLT SrcTy = MRI.getType(
LHS);
2699 LLT OverflowTy = MRI.getType(OriginalOverflow);
2706 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2707 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2708 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2715 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2717 MachineInstrBuilder Mulo;
2718 if (WideMulCanOverflow)
2719 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2720 {LeftOperand, RightOperand});
2722 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2727 MachineInstrBuilder ExtResult;
2734 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2738 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2741 if (WideMulCanOverflow) {
2749 MI.eraseFromParent();
2755 unsigned Opcode =
MI.getOpcode();
2759 case TargetOpcode::G_ATOMICRMW_XCHG:
2760 case TargetOpcode::G_ATOMICRMW_ADD:
2761 case TargetOpcode::G_ATOMICRMW_SUB:
2762 case TargetOpcode::G_ATOMICRMW_AND:
2763 case TargetOpcode::G_ATOMICRMW_OR:
2764 case TargetOpcode::G_ATOMICRMW_XOR:
2765 case TargetOpcode::G_ATOMICRMW_MIN:
2766 case TargetOpcode::G_ATOMICRMW_MAX:
2767 case TargetOpcode::G_ATOMICRMW_UMIN:
2768 case TargetOpcode::G_ATOMICRMW_UMAX:
2769 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2775 case TargetOpcode::G_ATOMIC_CMPXCHG:
2776 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2783 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2793 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2798 case TargetOpcode::G_EXTRACT:
2799 return widenScalarExtract(
MI, TypeIdx, WideTy);
2800 case TargetOpcode::G_INSERT:
2801 return widenScalarInsert(
MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_MERGE_VALUES:
2803 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2804 case TargetOpcode::G_UNMERGE_VALUES:
2805 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2806 case TargetOpcode::G_SADDO:
2807 case TargetOpcode::G_SSUBO:
2808 case TargetOpcode::G_UADDO:
2809 case TargetOpcode::G_USUBO:
2810 case TargetOpcode::G_SADDE:
2811 case TargetOpcode::G_SSUBE:
2812 case TargetOpcode::G_UADDE:
2813 case TargetOpcode::G_USUBE:
2814 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2815 case TargetOpcode::G_UMULO:
2816 case TargetOpcode::G_SMULO:
2817 return widenScalarMulo(
MI, TypeIdx, WideTy);
2818 case TargetOpcode::G_SADDSAT:
2819 case TargetOpcode::G_SSUBSAT:
2820 case TargetOpcode::G_SSHLSAT:
2821 case TargetOpcode::G_UADDSAT:
2822 case TargetOpcode::G_USUBSAT:
2823 case TargetOpcode::G_USHLSAT:
2824 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2825 case TargetOpcode::G_CTTZ:
2826 case TargetOpcode::G_CTTZ_ZERO_POISON:
2827 case TargetOpcode::G_CTLZ:
2828 case TargetOpcode::G_CTLZ_ZERO_POISON:
2829 case TargetOpcode::G_CTLS:
2830 case TargetOpcode::G_CTPOP: {
2843 case TargetOpcode::G_CTTZ:
2844 case TargetOpcode::G_CTTZ_ZERO_POISON:
2845 case TargetOpcode::G_CTLZ_ZERO_POISON:
2846 ExtOpc = TargetOpcode::G_ANYEXT;
2848 case TargetOpcode::G_CTLS:
2849 ExtOpc = TargetOpcode::G_SEXT;
2852 ExtOpc = TargetOpcode::G_ZEXT;
2855 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2856 LLT CurTy = MRI.getType(SrcReg);
2857 unsigned NewOpc = Opcode;
2858 if (NewOpc == TargetOpcode::G_CTTZ) {
2865 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2867 NewOpc = TargetOpcode::G_CTTZ_ZERO_POISON;
2873 if (Opcode == TargetOpcode::G_CTLZ_ZERO_POISON) {
2883 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2885 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2890 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff),
2891 Opcode == TargetOpcode::G_CTLZ
2896 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2897 MI.eraseFromParent();
2900 case TargetOpcode::G_BSWAP: {
2904 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2905 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2906 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2909 MI.getOperand(0).setReg(DstExt);
2913 LLT Ty = MRI.getType(DstReg);
2915 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2916 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2922 case TargetOpcode::G_BITREVERSE: {
2926 LLT Ty = MRI.getType(DstReg);
2929 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2931 MI.getOperand(0).setReg(DstExt);
2934 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2935 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2940 case TargetOpcode::G_FREEZE:
2941 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2948 case TargetOpcode::G_ABS:
2955 case TargetOpcode::G_ADD:
2956 case TargetOpcode::G_AND:
2957 case TargetOpcode::G_MUL:
2958 case TargetOpcode::G_OR:
2959 case TargetOpcode::G_XOR:
2960 case TargetOpcode::G_SUB:
2961 case TargetOpcode::G_SHUFFLE_VECTOR:
2972 case TargetOpcode::G_SBFX:
2973 case TargetOpcode::G_UBFX:
2987 case TargetOpcode::G_SHL:
3003 case TargetOpcode::G_ROTR:
3004 case TargetOpcode::G_ROTL:
3013 case TargetOpcode::G_SDIV:
3014 case TargetOpcode::G_SREM:
3015 case TargetOpcode::G_SMIN:
3016 case TargetOpcode::G_SMAX:
3017 case TargetOpcode::G_ABDS:
3025 case TargetOpcode::G_SDIVREM:
3035 case TargetOpcode::G_ASHR:
3036 case TargetOpcode::G_LSHR:
3040 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3054 case TargetOpcode::G_UDIV:
3055 case TargetOpcode::G_UREM:
3056 case TargetOpcode::G_ABDU:
3063 case TargetOpcode::G_UDIVREM:
3072 case TargetOpcode::G_UMIN:
3073 case TargetOpcode::G_UMAX: {
3074 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3076 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3080 ? TargetOpcode::G_SEXT
3081 : TargetOpcode::G_ZEXT;
3091 case TargetOpcode::G_SELECT:
3101 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3108 case TargetOpcode::G_FPEXT:
3116 case TargetOpcode::G_FPTOSI:
3117 case TargetOpcode::G_FPTOUI:
3118 case TargetOpcode::G_INTRINSIC_LRINT:
3119 case TargetOpcode::G_INTRINSIC_LLRINT:
3120 case TargetOpcode::G_IS_FPCLASS:
3130 case TargetOpcode::G_SITOFP:
3140 case TargetOpcode::G_UITOFP:
3150 case TargetOpcode::G_FPTOSI_SAT:
3151 case TargetOpcode::G_FPTOUI_SAT:
3156 LLT Ty = MRI.getType(OldDst);
3157 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3159 MI.getOperand(0).setReg(ExtReg);
3160 uint64_t ShortBits = Ty.getScalarSizeInBits();
3163 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3174 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3175 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3183 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3191 case TargetOpcode::G_LOAD:
3192 case TargetOpcode::G_SEXTLOAD:
3193 case TargetOpcode::G_ZEXTLOAD:
3194 case TargetOpcode::G_FPEXTLOAD:
3200 case TargetOpcode::G_STORE: {
3204 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3205 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3206 if (!Ty.isScalar()) {
3214 MI.setMemRefs(MF, {NewMMO});
3221 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3222 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3228 case TargetOpcode::G_FPTRUNCSTORE:
3235 case TargetOpcode::G_CONSTANT: {
3238 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3239 MRI.getType(
MI.getOperand(0).getReg()));
3240 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3241 ExtOpc == TargetOpcode::G_ANYEXT) &&
3244 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3248 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3254 case TargetOpcode::G_FCONSTANT: {
3260 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3262 MI.eraseFromParent();
3265 case TargetOpcode::G_IMPLICIT_DEF: {
3271 case TargetOpcode::G_BRCOND:
3277 case TargetOpcode::G_FCMP:
3288 case TargetOpcode::G_ICMP:
3293 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3297 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3298 unsigned ExtOpcode =
3302 ? TargetOpcode::G_SEXT
3303 : TargetOpcode::G_ZEXT;
3310 case TargetOpcode::G_PTR_ADD:
3311 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3317 case TargetOpcode::G_PHI: {
3318 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3321 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3333 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3336 LLT VecTy = MRI.getType(VecReg);
3340 TargetOpcode::G_ANYEXT);
3354 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3370 LLT VecTy = MRI.getType(VecReg);
3389 case TargetOpcode::G_FADD:
3390 case TargetOpcode::G_FMUL:
3391 case TargetOpcode::G_FSUB:
3392 case TargetOpcode::G_FMA:
3393 case TargetOpcode::G_FMAD:
3394 case TargetOpcode::G_FNEG:
3395 case TargetOpcode::G_FABS:
3396 case TargetOpcode::G_FCANONICALIZE:
3397 case TargetOpcode::G_FMINNUM:
3398 case TargetOpcode::G_FMAXNUM:
3399 case TargetOpcode::G_FMINNUM_IEEE:
3400 case TargetOpcode::G_FMAXNUM_IEEE:
3401 case TargetOpcode::G_FMINIMUM:
3402 case TargetOpcode::G_FMAXIMUM:
3403 case TargetOpcode::G_FMINIMUMNUM:
3404 case TargetOpcode::G_FMAXIMUMNUM:
3405 case TargetOpcode::G_FDIV:
3406 case TargetOpcode::G_FREM:
3407 case TargetOpcode::G_FCEIL:
3408 case TargetOpcode::G_FFLOOR:
3409 case TargetOpcode::G_FCOS:
3410 case TargetOpcode::G_FSIN:
3411 case TargetOpcode::G_FTAN:
3412 case TargetOpcode::G_FACOS:
3413 case TargetOpcode::G_FASIN:
3414 case TargetOpcode::G_FATAN:
3415 case TargetOpcode::G_FATAN2:
3416 case TargetOpcode::G_FCOSH:
3417 case TargetOpcode::G_FSINH:
3418 case TargetOpcode::G_FTANH:
3419 case TargetOpcode::G_FLOG10:
3420 case TargetOpcode::G_FLOG:
3421 case TargetOpcode::G_FLOG2:
3422 case TargetOpcode::G_FRINT:
3423 case TargetOpcode::G_FNEARBYINT:
3424 case TargetOpcode::G_FSQRT:
3425 case TargetOpcode::G_FEXP:
3426 case TargetOpcode::G_FEXP2:
3427 case TargetOpcode::G_FEXP10:
3428 case TargetOpcode::G_FPOW:
3429 case TargetOpcode::G_INTRINSIC_TRUNC:
3430 case TargetOpcode::G_INTRINSIC_ROUND:
3431 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3435 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3441 case TargetOpcode::G_FMODF: {
3451 case TargetOpcode::G_FPOWI:
3452 case TargetOpcode::G_FLDEXP:
3453 case TargetOpcode::G_STRICT_FLDEXP: {
3455 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3476 case TargetOpcode::G_FFREXP: {
3489 case TargetOpcode::G_LROUND:
3490 case TargetOpcode::G_LLROUND:
3501 case TargetOpcode::G_INTTOPTR:
3509 case TargetOpcode::G_PTRTOINT:
3517 case TargetOpcode::G_BUILD_VECTOR: {
3521 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3527 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3535 case TargetOpcode::G_SEXT_INREG:
3544 case TargetOpcode::G_PTRMASK: {
3552 case TargetOpcode::G_VECREDUCE_ADD: {
3561 case TargetOpcode::G_VECREDUCE_FADD:
3562 case TargetOpcode::G_VECREDUCE_FMUL:
3563 case TargetOpcode::G_VECREDUCE_FMIN:
3564 case TargetOpcode::G_VECREDUCE_FMAX:
3565 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3566 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3571 LLT VecTy = MRI.getType(VecReg);
3578 case TargetOpcode::G_VSCALE: {
3585 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3590 case TargetOpcode::G_SPLAT_VECTOR: {
3599 case TargetOpcode::G_INSERT_SUBVECTOR: {
3607 LLT SubVecTy = MRI.getType(SubVec);
3611 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3612 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3613 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3617 auto SplatZero =
MIRBuilder.buildSplatVector(
3622 MI.eraseFromParent();
3631 auto Unmerge =
B.buildUnmerge(Ty, Src);
3632 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3641 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3655 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3664 MI.eraseFromParent();
3675 MI.eraseFromParent();
3682 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3683 if (SrcTy.isVector()) {
3687 if (DstTy.isVector()) {
3688 int NumDstElt = DstTy.getNumElements();
3689 int NumSrcElt = SrcTy.getNumElements();
3692 LLT DstCastTy = DstEltTy;
3693 LLT SrcPartTy = SrcEltTy;
3697 if (NumSrcElt < NumDstElt) {
3708 SrcPartTy = SrcEltTy;
3709 }
else if (NumSrcElt > NumDstElt) {
3721 DstCastTy = DstEltTy;
3726 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3730 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3731 MI.eraseFromParent();
3735 if (DstTy.isVector()) {
3738 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3739 MI.eraseFromParent();
3755 unsigned NewEltSize,
3756 unsigned OldEltSize) {
3757 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3758 LLT IdxTy =
B.getMRI()->getType(Idx);
3761 auto OffsetMask =
B.buildConstant(
3763 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3764 return B.buildShl(IdxTy, OffsetIdx,
3765 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3780 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3784 unsigned OldNumElts = SrcVecTy.getNumElements();
3791 if (NewNumElts > OldNumElts) {
3802 if (NewNumElts % OldNumElts != 0)
3806 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3810 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3813 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3815 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3816 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3817 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3818 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3819 NewOps[
I] = Elt.getReg(0);
3822 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3824 MI.eraseFromParent();
3828 if (NewNumElts < OldNumElts) {
3829 if (NewEltSize % OldEltSize != 0)
3851 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3852 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3855 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3859 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3860 ScaledIdx).getReg(0);
3868 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3870 MI.eraseFromParent();
3884 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3885 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3886 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3887 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3890 auto EltMask =
B.buildConstant(
3894 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3895 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3898 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3902 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3916 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3917 MI.getFirst4RegLLTs();
3929 if (NewNumElts < OldNumElts) {
3930 if (NewEltSize % OldEltSize != 0)
3939 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3940 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3943 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3947 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3948 ScaledIdx).getReg(0);
3958 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3959 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3963 MI.eraseFromParent();
3993 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3997 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3998 return UnableToLegalize;
4003 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4005 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
4014 MI.eraseFromParent();
4032 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4033 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4043 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4044 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4046 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4047 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4049 MI.eraseFromParent();
4079 LLT DstTy = MRI.getType(Dst);
4080 LLT SrcTy = MRI.getType(Src);
4086 if (DstTy == CastTy)
4094 if (CastEltSize < DstEltSize)
4097 auto AdjustAmt = CastEltSize / DstEltSize;
4098 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4099 SrcTyMinElts % AdjustAmt != 0)
4104 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4105 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4108 ES->eraseFromParent();
4143 LLT DstTy = MRI.getType(Dst);
4144 LLT BigVecTy = MRI.getType(BigVec);
4145 LLT SubVecTy = MRI.getType(SubVec);
4147 if (DstTy == CastTy)
4162 if (CastEltSize < DstEltSize)
4165 auto AdjustAmt = CastEltSize / DstEltSize;
4166 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4167 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4173 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4174 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4176 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4179 ES->eraseFromParent();
4187 LLT DstTy = MRI.getType(DstReg);
4197 if (MemSizeInBits != MemStoreSizeInBits) {
4214 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4218 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4219 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4221 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4224 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4226 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4229 if (DstTy != LoadTy)
4237 if (
MIRBuilder.getDataLayout().isBigEndian())
4255 uint64_t LargeSplitSize, SmallSplitSize;
4260 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4267 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4270 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4281 if (Alignment.
value() * 8 > MemSizeInBits &&
4286 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4303 LLT PtrTy = MRI.getType(PtrReg);
4316 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4319 auto OffsetCst =
MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4320 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4321 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4323 SmallPtr, *SmallMMO);
4325 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4326 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4328 if (AnyExtTy == DstTy)
4329 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4331 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4335 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4355 LLT SrcTy = MRI.getType(SrcReg);
4363 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4369 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4371 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4375 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4379 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4394 uint64_t LargeSplitSize, SmallSplitSize;
4401 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4404 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4413 if (SrcTy.isPointer()) {
4415 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4418 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4421 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4422 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4425 LLT PtrTy = MRI.getType(PtrReg);
4427 LargeSplitSize / 8);
4428 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4434 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4435 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4444 LLT SrcTy = MRI.getType(SrcReg);
4450 assert(SrcTy.isVector() &&
"Expect a vector store type");
4457 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4461 auto Elt =
MIRBuilder.buildExtractVectorElement(
4462 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4463 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4464 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4470 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4471 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4475 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4486 switch (
MI.getOpcode()) {
4487 case TargetOpcode::G_LOAD: {
4505 case TargetOpcode::G_STORE: {
4521 case TargetOpcode::G_SELECT: {
4525 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4527 dbgs() <<
"bitcast action not implemented for vector select\n");
4538 case TargetOpcode::G_AND:
4539 case TargetOpcode::G_OR:
4540 case TargetOpcode::G_XOR: {
4548 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4550 case TargetOpcode::G_INSERT_VECTOR_ELT:
4552 case TargetOpcode::G_CONCAT_VECTORS:
4554 case TargetOpcode::G_SHUFFLE_VECTOR:
4556 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4558 case TargetOpcode::G_INSERT_SUBVECTOR:
4566void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4576 switch(
MI.getOpcode()) {
4579 case TargetOpcode::G_FCONSTANT:
4581 case TargetOpcode::G_BITCAST:
4583 case TargetOpcode::G_SREM:
4584 case TargetOpcode::G_UREM: {
4585 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4587 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4588 {MI.getOperand(1), MI.getOperand(2)});
4590 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4592 MI.eraseFromParent();
4595 case TargetOpcode::G_SADDO:
4596 case TargetOpcode::G_SSUBO:
4598 case TargetOpcode::G_SADDE:
4600 case TargetOpcode::G_SSUBE:
4602 case TargetOpcode::G_UMULH:
4603 case TargetOpcode::G_SMULH:
4605 case TargetOpcode::G_SMULO:
4606 case TargetOpcode::G_UMULO: {
4609 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4610 LLT Ty = MRI.getType(Res);
4612 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4613 ? TargetOpcode::G_SMULH
4614 : TargetOpcode::G_UMULH;
4618 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4619 MI.removeOperand(1);
4622 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4630 if (Opcode == TargetOpcode::G_SMULH) {
4631 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4632 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4639 case TargetOpcode::G_FNEG: {
4640 auto [Res, ResTy, SubByReg, SubByRegTy] =
MI.getFirst2RegLLTs();
4643 Register CastedSubByReg = SubByReg;
4645 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4646 !SubByRegTy.getScalarType().isInteger()) {
4647 auto BitcastDst = SubByRegTy.changeElementType(
4649 CastedSubByReg =
MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4655 if (ResTy != TyInt) {
4657 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4660 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4662 MI.eraseFromParent();
4665 case TargetOpcode::G_FSUB:
4666 case TargetOpcode::G_STRICT_FSUB: {
4667 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4668 LLT Ty = MRI.getType(Res);
4673 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4674 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4678 MI.eraseFromParent();
4681 case TargetOpcode::G_FMAD:
4683 case TargetOpcode::G_FFLOOR:
4685 case TargetOpcode::G_LROUND:
4686 case TargetOpcode::G_LLROUND: {
4689 LLT SrcTy = MRI.getType(SrcReg);
4690 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4693 MI.eraseFromParent();
4696 case TargetOpcode::G_INTRINSIC_ROUND:
4698 case TargetOpcode::G_FRINT: {
4701 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4704 case TargetOpcode::G_INTRINSIC_LRINT:
4705 case TargetOpcode::G_INTRINSIC_LLRINT: {
4708 LLT SrcTy = MRI.getType(SrcReg);
4710 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4712 MI.eraseFromParent();
4715 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4716 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4717 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4718 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4719 **
MI.memoperands_begin());
4721 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4722 MI.eraseFromParent();
4725 case TargetOpcode::G_LOAD:
4726 case TargetOpcode::G_SEXTLOAD:
4727 case TargetOpcode::G_ZEXTLOAD:
4729 case TargetOpcode::G_STORE:
4731 case TargetOpcode::G_CTLZ_ZERO_POISON:
4732 case TargetOpcode::G_CTTZ_ZERO_POISON:
4733 case TargetOpcode::G_CTLZ:
4734 case TargetOpcode::G_CTTZ:
4735 case TargetOpcode::G_CTPOP:
4736 case TargetOpcode::G_CTLS:
4739 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4741 Register NewRes = MRI.cloneVirtualRegister(Res);
4748 MI.eraseFromParent();
4752 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4753 const LLT CondTy = MRI.getType(CarryOut);
4754 const LLT Ty = MRI.getType(Res);
4756 Register NewRes = MRI.cloneVirtualRegister(Res);
4759 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4765 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4766 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4773 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4778 MI.eraseFromParent();
4782 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4787 MI.eraseFromParent();
4791 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4792 const LLT CondTy = MRI.getType(BorrowOut);
4793 const LLT Ty = MRI.getType(Res);
4796 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4802 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4803 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4810 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4811 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4813 MI.eraseFromParent();
4853 case G_MERGE_VALUES:
4855 case G_UNMERGE_VALUES:
4857 case TargetOpcode::G_SEXT_INREG: {
4858 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4859 int64_t SizeInBits =
MI.getOperand(2).getImm();
4861 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4862 LLT DstTy = MRI.getType(DstReg);
4863 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4866 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4867 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4868 MI.eraseFromParent();
4871 case G_EXTRACT_VECTOR_ELT:
4872 case G_INSERT_VECTOR_ELT:
4874 case G_SHUFFLE_VECTOR:
4876 case G_VECTOR_COMPRESS:
4878 case G_DYN_STACKALLOC:
4882 case G_STACKRESTORE:
4892 case G_READ_REGISTER:
4893 case G_WRITE_REGISTER:
4900 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4901 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4907 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4912 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4923 bool IsSigned =
MI.getOpcode() == G_ABDS;
4924 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4925 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4926 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4949 case G_MEMCPY_INLINE:
4961 case G_ATOMICRMW_SUB: {
4962 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4963 const LLT ValTy = MRI.getType(Val);
4967 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4968 MI.eraseFromParent();
4994 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4998 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
5004 Align StackTypeAlign =
5011 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
5012 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
5017 LLT IdxTy =
B.getMRI()->getType(IdxReg);
5029 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
5032 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
5043 "Converting bits to bytes lost precision");
5049 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5050 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
5052 if (IdxTy != MRI.getType(Index))
5053 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5058 LLT PtrTy = MRI.getType(VecPtr);
5059 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
5067 std::initializer_list<unsigned> NonVecOpIndices) {
5068 if (
MI.getNumMemOperands() != 0)
5085 if (!Ty.isVector()) {
5091 if (Ty.getNumElements() != NumElts)
5106 assert(Ty.isVector() &&
"Expected vector type");
5108 int NumParts, NumLeftover;
5109 std::tie(NumParts, NumLeftover) =
5112 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5113 for (
int i = 0; i < NumParts; ++i) {
5118 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5127 for (
unsigned i = 0; i <
N; ++i) {
5129 Ops.push_back(
Op.getReg());
5130 else if (
Op.isImm())
5131 Ops.push_back(
Op.getImm());
5132 else if (
Op.isPredicate())
5154 std::initializer_list<unsigned> NonVecOpIndices) {
5156 "Non-compatible opcode or not specified non-vector operands");
5157 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5159 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5160 unsigned NumDefs =
MI.getNumDefs();
5168 for (
unsigned i = 0; i < NumDefs; ++i) {
5169 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5177 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5178 ++UseIdx, ++UseNo) {
5181 MI.getOperand(UseIdx));
5190 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5194 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5196 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5197 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5200 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5201 Uses.push_back(InputOpsPieces[InputNo][i]);
5204 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5205 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5210 for (
unsigned i = 0; i < NumDefs; ++i)
5211 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5213 for (
unsigned i = 0; i < NumDefs; ++i)
5214 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5217 MI.eraseFromParent();
5224 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5226 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5227 unsigned NumDefs =
MI.getNumDefs();
5231 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5236 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5237 UseIdx += 2, ++UseNo) {
5245 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5247 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5248 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5250 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5253 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5254 Phi.addUse(InputOpsPieces[j][i]);
5255 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5265 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5267 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5270 MI.eraseFromParent();
5278 const int NumDst =
MI.getNumOperands() - 1;
5279 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5280 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5281 LLT SrcTy = MRI.getType(SrcReg);
5283 if (TypeIdx != 1 || NarrowTy == DstTy)
5290 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5293 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5307 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5308 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5309 const int PartsPerUnmerge = NumDst / NumUnmerge;
5311 for (
int I = 0;
I != NumUnmerge; ++
I) {
5312 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5314 for (
int J = 0; J != PartsPerUnmerge; ++J)
5315 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5316 MIB.addUse(Unmerge.getReg(
I));
5319 MI.eraseFromParent();
5326 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5330 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5332 if (NarrowTy == SrcTy)
5340 assert(SrcTy.isVector() &&
"Expected vector types");
5342 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5356 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5357 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5358 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5364 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5365 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5366 ++i,
Offset += NumNarrowTyElts) {
5369 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5372 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5373 MI.eraseFromParent();
5377 assert(TypeIdx == 0 &&
"Bad type index");
5378 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5393 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5394 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5396 for (
unsigned i = 0; i < NumParts; ++i) {
5398 for (
unsigned j = 0; j < NumElts; ++j)
5399 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5401 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5404 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5405 MI.eraseFromParent();
5413 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5415 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5417 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5419 InsertVal =
MI.getOperand(2).getReg();
5421 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5422 LLT VecTy = MRI.getType(SrcVec);
5428 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5432 MI.eraseFromParent();
5441 SplitPieces[IdxVal] = InsertVal;
5442 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5444 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5448 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5451 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5452 TargetOpcode::G_ANYEXT);
5456 LLT IdxTy = MRI.getType(Idx);
5457 int64_t PartIdx = IdxVal / NewNumElts;
5459 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5462 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5465 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5466 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5467 VecParts[PartIdx] = InsertPart.getReg(0);
5471 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5473 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5477 MI.eraseFromParent();
5497 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5509 LLT ValTy = MRI.getType(ValReg);
5518 int NumLeftover = -1;
5524 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5526 NumParts = NarrowRegs.
size();
5527 NumLeftover = NarrowLeftoverRegs.
size();
5534 LLT PtrTy = MRI.getType(AddrReg);
5544 auto MMO = LdStMI.
getMMO();
5546 unsigned NumParts,
unsigned Offset) ->
unsigned {
5549 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5551 unsigned ByteOffset =
Offset / 8;
5554 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5561 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5562 ValRegs.push_back(Dst);
5563 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5565 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5574 unsigned HandledOffset =
5575 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5579 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5582 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5583 LeftoverTy, NarrowLeftoverRegs);
5597 switch (
MI.getOpcode()) {
5598 case G_IMPLICIT_DEF:
5614 case G_FCANONICALIZE:
5631 case G_INTRINSIC_LRINT:
5632 case G_INTRINSIC_LLRINT:
5633 case G_INTRINSIC_ROUND:
5634 case G_INTRINSIC_ROUNDEVEN:
5637 case G_INTRINSIC_TRUNC:
5665 case G_FMINNUM_IEEE:
5666 case G_FMAXNUM_IEEE:
5688 case G_CTLZ_ZERO_POISON:
5690 case G_CTTZ_ZERO_POISON:
5707 case G_ADDRSPACE_CAST:
5720 case G_STRICT_FLDEXP:
5722 case G_TRUNC_SSAT_S:
5723 case G_TRUNC_SSAT_U:
5724 case G_TRUNC_USAT_U:
5732 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5737 case G_UNMERGE_VALUES:
5739 case G_BUILD_VECTOR:
5740 assert(TypeIdx == 0 &&
"not a vector type index");
5742 case G_CONCAT_VECTORS:
5746 case G_EXTRACT_VECTOR_ELT:
5747 case G_INSERT_VECTOR_ELT:
5756 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5757 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5759 case G_SHUFFLE_VECTOR:
5765 case G_INTRINSIC_FPTRUNC_ROUND:
5775 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5776 "Not a bitcast operation");
5781 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5783 unsigned NewElemCount =
5786 if (NewElemCount == 1) {
5789 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5796 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5805 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5806 MI.eraseFromParent();
5812 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5816 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5817 MI.getFirst3RegLLTs();
5820 if (DstTy != Src1Ty)
5822 if (DstTy != Src2Ty)
5837 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5853 unsigned InputUsed[2] = {-1U, -1U};
5854 unsigned FirstMaskIdx =
High * NewElts;
5855 bool UseBuildVector =
false;
5856 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5858 int Idx = Mask[FirstMaskIdx + MaskOffset];
5863 if (
Input >= std::size(Inputs)) {
5870 Idx -=
Input * NewElts;
5874 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5875 if (InputUsed[OpNo] ==
Input) {
5878 }
else if (InputUsed[OpNo] == -1U) {
5880 InputUsed[OpNo] =
Input;
5885 if (OpNo >= std::size(InputUsed)) {
5888 UseBuildVector =
true;
5893 Ops.push_back(Idx + OpNo * NewElts);
5896 if (UseBuildVector) {
5901 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5903 int Idx = Mask[FirstMaskIdx + MaskOffset];
5908 if (
Input >= std::size(Inputs)) {
5915 Idx -=
Input * NewElts;
5919 .buildExtractVectorElement(
5920 EltTy, Inputs[
Input],
5926 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5927 }
else if (InputUsed[0] == -1U) {
5929 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5930 }
else if (NewElts == 1) {
5931 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5933 Register Op0 = Inputs[InputUsed[0]];
5937 : Inputs[InputUsed[1]];
5939 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5946 MI.eraseFromParent();
5959 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5965 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5968 const unsigned NumParts =
5970 : SrcTy.getNumElements();
5974 if (DstTy != NarrowTy)
5980 unsigned NumPartsLeft = NumParts;
5981 while (NumPartsLeft > 1) {
5982 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5985 .buildInstr(ScalarOpc, {NarrowTy},
5986 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5989 SplitSrcs = PartialResults;
5990 PartialResults.
clear();
5991 NumPartsLeft = SplitSrcs.
size();
5995 MI.eraseFromParent();
6000 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
6001 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
6004 MI.eraseFromParent();
6008 for (
unsigned Part = 0; Part < NumParts; ++Part) {
6010 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
6018 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6021 Register Acc = PartialReductions[0];
6022 for (
unsigned Part = 1; Part < NumParts; ++Part) {
6023 if (Part == NumParts - 1) {
6025 {Acc, PartialReductions[Part]});
6028 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
6032 MI.eraseFromParent();
6038 unsigned int TypeIdx,
6040 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6041 MI.getFirst3RegLLTs();
6042 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6046 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6047 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6048 "Unexpected vecreduce opcode");
6049 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6050 ? TargetOpcode::G_FADD
6051 : TargetOpcode::G_FMUL;
6054 unsigned NumParts = SrcTy.getNumElements();
6057 for (
unsigned i = 0; i < NumParts; i++)
6058 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6062 MI.eraseFromParent();
6069 unsigned ScalarOpc) {
6077 while (SplitSrcs.
size() > 1) {
6079 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6087 SplitSrcs = std::move(PartialRdxs);
6091 MI.getOperand(1).setReg(SplitSrcs[0]);
6098 const LLT HalfTy,
const LLT AmtTy) {
6100 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6101 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6105 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6106 MI.eraseFromParent();
6112 unsigned VTBits = 2 * NVTBits;
6115 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6116 if (Amt.
ugt(VTBits)) {
6118 }
else if (Amt.
ugt(NVTBits)) {
6121 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6122 }
else if (Amt == NVTBits) {
6130 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6133 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6134 if (Amt.
ugt(VTBits)) {
6136 }
else if (Amt.
ugt(NVTBits)) {
6138 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6140 }
else if (Amt == NVTBits) {
6144 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6146 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6148 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6154 if (Amt.
ugt(VTBits)) {
6156 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6157 }
else if (Amt.
ugt(NVTBits)) {
6159 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6161 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6162 }
else if (Amt == NVTBits) {
6165 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6167 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6169 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6171 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6178 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6179 MI.eraseFromParent();
6195 LLT DstTy = MRI.getType(DstReg);
6200 LLT ShiftAmtTy = MRI.getType(Amt);
6202 if (DstEltSize % 2 != 0)
6218 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6229 const unsigned NewBitSize = DstEltSize / 2;
6241 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6243 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6244 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6247 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6248 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6250 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6255 switch (
MI.getOpcode()) {
6256 case TargetOpcode::G_SHL: {
6258 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6260 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6261 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6262 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6265 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6266 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6268 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6270 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6272 ResultRegs[0] =
Lo.getReg(0);
6273 ResultRegs[1] =
Hi.getReg(0);
6276 case TargetOpcode::G_LSHR:
6277 case TargetOpcode::G_ASHR: {
6279 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6281 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6282 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6283 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6287 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6290 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6291 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6293 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6297 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6299 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6301 ResultRegs[0] =
Lo.getReg(0);
6302 ResultRegs[1] =
Hi.getReg(0);
6309 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6310 MI.eraseFromParent();
6319 LLT TargetTy,
LLT ShiftAmtTy) {
6322 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6324 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6325 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6326 const bool NeedsInterWordShift = ShiftBits != 0;
6329 case TargetOpcode::G_SHL: {
6332 if (PartIdx < ShiftWords)
6335 unsigned SrcIdx = PartIdx - ShiftWords;
6336 if (!NeedsInterWordShift)
6337 return SrcParts[SrcIdx];
6342 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6346 return Hi.getReg(0);
6349 case TargetOpcode::G_LSHR: {
6350 unsigned SrcIdx = PartIdx + ShiftWords;
6351 if (SrcIdx >= NumParts)
6353 if (!NeedsInterWordShift)
6354 return SrcParts[SrcIdx];
6358 if (SrcIdx + 1 < NumParts) {
6359 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6363 return Lo.getReg(0);
6366 case TargetOpcode::G_ASHR: {
6368 unsigned SrcIdx = PartIdx + ShiftWords;
6369 if (SrcIdx >= NumParts)
6371 if (!NeedsInterWordShift)
6372 return SrcParts[SrcIdx];
6377 (SrcIdx == NumParts - 1)
6381 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6403 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6404 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6409 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6418 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6419 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6421 auto IsZeroBitShift =
6429 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6430 : TargetOpcode::G_SHL;
6433 auto TargetBitsConst =
6435 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6440 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6445 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6447 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6451 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6464 LLT DstTy = MRI.getType(DstReg);
6468 const unsigned NumParts = DstBits / TargetBits;
6470 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6480 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6481 MI.eraseFromParent();
6486 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6487 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6493 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6497 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6500 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6501 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6505 for (
unsigned I = 0;
I < NumParts; ++
I)
6507 Params, TargetTy, ShiftAmtTy);
6509 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6510 MI.eraseFromParent();
6519 LLT DstTy = MRI.getType(DstReg);
6520 LLT ShiftAmtTy = MRI.getType(AmtReg);
6524 const unsigned NumParts = DstBits / TargetBits;
6526 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6543 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6555 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6556 auto TargetBitsLog2Const =
6557 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6558 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6561 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6563 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6571 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6572 auto TargetBitsMinusOneConst =
6573 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6575 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6576 TargetBitsMinusOneConst)
6579 FillValue = ZeroReg;
6587 for (
unsigned I = 0;
I < NumParts; ++
I) {
6589 Register InBoundsResult = FillValue;
6599 for (
unsigned K = 0; K < NumParts; ++K) {
6600 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6602 WordShift, WordShiftKConst);
6614 switch (
MI.getOpcode()) {
6615 case TargetOpcode::G_SHL:
6616 MainSrcIdx = (int)
I - (
int)K;
6617 CarrySrcIdx = MainSrcIdx - 1;
6619 case TargetOpcode::G_LSHR:
6620 case TargetOpcode::G_ASHR:
6621 MainSrcIdx = (int)
I + (
int)K;
6622 CarrySrcIdx = MainSrcIdx + 1;
6630 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6631 Register MainOp = SrcParts[MainSrcIdx];
6635 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6636 CarryOp = SrcParts[CarrySrcIdx];
6637 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6638 CarrySrcIdx >= (
int)NumParts)
6639 CarryOp = FillValue;
6645 ResultForK = FillValue;
6651 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6658 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6662 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6663 MI.eraseFromParent();
6670 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6673 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6688 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6693 "getNeutralElementForVecReduce called with invalid opcode!");
6694 case TargetOpcode::G_VECREDUCE_ADD:
6695 case TargetOpcode::G_VECREDUCE_OR:
6696 case TargetOpcode::G_VECREDUCE_XOR:
6697 case TargetOpcode::G_VECREDUCE_UMAX:
6699 case TargetOpcode::G_VECREDUCE_MUL:
6701 case TargetOpcode::G_VECREDUCE_AND:
6702 case TargetOpcode::G_VECREDUCE_UMIN:
6705 case TargetOpcode::G_VECREDUCE_SMAX:
6708 case TargetOpcode::G_VECREDUCE_SMIN:
6711 case TargetOpcode::G_VECREDUCE_FADD:
6713 case TargetOpcode::G_VECREDUCE_FMUL:
6715 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6716 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6717 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6718 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6726 unsigned Opc =
MI.getOpcode();
6728 case TargetOpcode::G_IMPLICIT_DEF:
6729 case TargetOpcode::G_LOAD: {
6737 case TargetOpcode::G_STORE:
6744 case TargetOpcode::G_AND:
6745 case TargetOpcode::G_OR:
6746 case TargetOpcode::G_XOR:
6747 case TargetOpcode::G_ADD:
6748 case TargetOpcode::G_SUB:
6749 case TargetOpcode::G_MUL:
6750 case TargetOpcode::G_FADD:
6751 case TargetOpcode::G_FSUB:
6752 case TargetOpcode::G_FMUL:
6753 case TargetOpcode::G_FDIV:
6754 case TargetOpcode::G_FCOPYSIGN:
6755 case TargetOpcode::G_UADDSAT:
6756 case TargetOpcode::G_USUBSAT:
6757 case TargetOpcode::G_SADDSAT:
6758 case TargetOpcode::G_SSUBSAT:
6759 case TargetOpcode::G_SMIN:
6760 case TargetOpcode::G_SMAX:
6761 case TargetOpcode::G_UMIN:
6762 case TargetOpcode::G_UMAX:
6763 case TargetOpcode::G_FMINNUM:
6764 case TargetOpcode::G_FMAXNUM:
6765 case TargetOpcode::G_FMINNUM_IEEE:
6766 case TargetOpcode::G_FMAXNUM_IEEE:
6767 case TargetOpcode::G_FMINIMUM:
6768 case TargetOpcode::G_FMAXIMUM:
6769 case TargetOpcode::G_FMINIMUMNUM:
6770 case TargetOpcode::G_FMAXIMUMNUM:
6771 case TargetOpcode::G_STRICT_FADD:
6772 case TargetOpcode::G_STRICT_FSUB:
6773 case TargetOpcode::G_STRICT_FMUL: {
6781 case TargetOpcode::G_SHL:
6782 case TargetOpcode::G_ASHR:
6783 case TargetOpcode::G_LSHR: {
6789 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6795 case TargetOpcode::G_FMA:
6796 case TargetOpcode::G_STRICT_FMA:
6797 case TargetOpcode::G_FSHR:
6798 case TargetOpcode::G_FSHL: {
6807 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6808 case TargetOpcode::G_EXTRACT:
6815 case TargetOpcode::G_INSERT:
6816 case TargetOpcode::G_INSERT_VECTOR_ELT:
6817 case TargetOpcode::G_FREEZE:
6818 case TargetOpcode::G_FNEG:
6819 case TargetOpcode::G_FABS:
6820 case TargetOpcode::G_FSQRT:
6821 case TargetOpcode::G_FCEIL:
6822 case TargetOpcode::G_FFLOOR:
6823 case TargetOpcode::G_FNEARBYINT:
6824 case TargetOpcode::G_FRINT:
6825 case TargetOpcode::G_INTRINSIC_ROUND:
6826 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6827 case TargetOpcode::G_INTRINSIC_TRUNC:
6828 case TargetOpcode::G_BITREVERSE:
6829 case TargetOpcode::G_BSWAP:
6830 case TargetOpcode::G_FCANONICALIZE:
6831 case TargetOpcode::G_SEXT_INREG:
6832 case TargetOpcode::G_ABS:
6833 case TargetOpcode::G_CTLZ:
6834 case TargetOpcode::G_CTPOP:
6842 case TargetOpcode::G_SELECT: {
6843 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6845 if (!CondTy.isScalar() ||
6851 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6853 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6858 if (CondTy.isVector())
6868 case TargetOpcode::G_UNMERGE_VALUES:
6870 case TargetOpcode::G_PHI:
6872 case TargetOpcode::G_SHUFFLE_VECTOR:
6874 case TargetOpcode::G_BUILD_VECTOR: {
6876 for (
auto Op :
MI.uses()) {
6884 MIRBuilder.buildDeleteTrailingVectorElements(
6885 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6886 MI.eraseFromParent();
6889 case TargetOpcode::G_SEXT:
6890 case TargetOpcode::G_ZEXT:
6891 case TargetOpcode::G_ANYEXT:
6892 case TargetOpcode::G_TRUNC:
6893 case TargetOpcode::G_FPTRUNC:
6894 case TargetOpcode::G_FPEXT:
6895 case TargetOpcode::G_FPTOSI:
6896 case TargetOpcode::G_FPTOUI:
6897 case TargetOpcode::G_FPTOSI_SAT:
6898 case TargetOpcode::G_FPTOUI_SAT:
6899 case TargetOpcode::G_SITOFP:
6900 case TargetOpcode::G_UITOFP: {
6907 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6910 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6918 case TargetOpcode::G_ICMP:
6919 case TargetOpcode::G_FCMP: {
6927 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6932 case TargetOpcode::G_BITCAST: {
6936 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6937 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6953 case TargetOpcode::G_VECREDUCE_FADD:
6954 case TargetOpcode::G_VECREDUCE_FMUL:
6955 case TargetOpcode::G_VECREDUCE_ADD:
6956 case TargetOpcode::G_VECREDUCE_MUL:
6957 case TargetOpcode::G_VECREDUCE_AND:
6958 case TargetOpcode::G_VECREDUCE_OR:
6959 case TargetOpcode::G_VECREDUCE_XOR:
6960 case TargetOpcode::G_VECREDUCE_SMAX:
6961 case TargetOpcode::G_VECREDUCE_SMIN:
6962 case TargetOpcode::G_VECREDUCE_UMAX:
6963 case TargetOpcode::G_VECREDUCE_UMIN: {
6964 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6966 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6967 auto NeutralElement = getNeutralElementForVecReduce(
6973 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6974 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6975 NeutralElement, Idx);
6979 MO.
setReg(NewVec.getReg(0));
6991 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6993 unsigned MaskNumElts = Mask.size();
6994 unsigned SrcNumElts = SrcTy.getNumElements();
6997 if (MaskNumElts == SrcNumElts)
7000 if (MaskNumElts < SrcNumElts) {
7008 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7009 MI.getOperand(1).getReg(),
7010 MI.getOperand(2).getReg(), NewMask);
7011 MI.eraseFromParent();
7016 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
7017 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7026 MOps1[0] =
MI.getOperand(1).getReg();
7027 MOps2[0] =
MI.getOperand(2).getReg();
7029 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
7030 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
7034 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
7036 if (Idx >=
static_cast<int>(SrcNumElts))
7037 Idx += PaddedMaskNumElts - SrcNumElts;
7042 if (MaskNumElts != PaddedMaskNumElts) {
7044 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7047 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
7049 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
7054 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7057 MI.eraseFromParent();
7063 unsigned int TypeIdx,
LLT MoreTy) {
7064 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
7066 unsigned NumElts = DstTy.getNumElements();
7069 if (DstTy.isVector() && Src1Ty.isVector() &&
7070 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7078 if (DstTy != Src1Ty || DstTy != Src2Ty)
7086 for (
unsigned I = 0;
I != NumElts; ++
I) {
7088 if (Idx <
static_cast<int>(NumElts))
7091 NewMask[
I] = Idx - NumElts + WidenNumElts;
7095 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7096 MI.getOperand(1).getReg(),
7097 MI.getOperand(2).getReg(), NewMask);
7098 MI.eraseFromParent();
7107 unsigned SrcParts = Src1Regs.
size();
7108 unsigned DstParts = DstRegs.
size();
7110 unsigned DstIdx = 0;
7112 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7113 DstRegs[DstIdx] = FactorSum;
7118 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7120 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7121 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7123 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7129 unsigned LowStart = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7130 unsigned LowEnd = std::min(DstIdx, SrcParts - 1);
7131 for (
unsigned RevI = LowEnd + 1; RevI != LowStart; --RevI) {
7132 unsigned i = RevI - 1;
7134 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7144 if (DstIdx != DstParts - 1) {
7145 MachineInstrBuilder Uaddo =
7146 B.buildUAddo(NarrowTy,
LLT::integer(1), Factors[0], Factors[1]);
7147 FactorSum = Uaddo.
getReg(0);
7148 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7149 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7150 MachineInstrBuilder Uaddo =
7151 B.buildUAddo(NarrowTy,
LLT::integer(1), FactorSum, Factors[i]);
7152 FactorSum = Uaddo.
getReg(0);
7153 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7154 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7158 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7159 for (
unsigned i = 2; i < Factors.
size(); ++i)
7160 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7163 CarrySumPrevDstIdx = CarrySum;
7164 DstRegs[DstIdx] = FactorSum;
7176 LLT DstType = MRI.getType(DstReg);
7178 if (DstType.isVector())
7181 unsigned Opcode =
MI.getOpcode();
7182 unsigned OpO, OpE, OpF;
7184 case TargetOpcode::G_SADDO:
7185 case TargetOpcode::G_SADDE:
7186 case TargetOpcode::G_UADDO:
7187 case TargetOpcode::G_UADDE:
7188 case TargetOpcode::G_ADD:
7189 OpO = TargetOpcode::G_UADDO;
7190 OpE = TargetOpcode::G_UADDE;
7191 OpF = TargetOpcode::G_UADDE;
7192 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7193 OpF = TargetOpcode::G_SADDE;
7195 case TargetOpcode::G_SSUBO:
7196 case TargetOpcode::G_SSUBE:
7197 case TargetOpcode::G_USUBO:
7198 case TargetOpcode::G_USUBE:
7199 case TargetOpcode::G_SUB:
7200 OpO = TargetOpcode::G_USUBO;
7201 OpE = TargetOpcode::G_USUBE;
7202 OpF = TargetOpcode::G_USUBE;
7203 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7204 OpF = TargetOpcode::G_SSUBE;
7211 unsigned NumDefs =
MI.getNumExplicitDefs();
7212 Register Src1 =
MI.getOperand(NumDefs).getReg();
7213 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7216 CarryDst =
MI.getOperand(1).getReg();
7217 if (
MI.getNumOperands() == NumDefs + 3)
7218 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7220 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7221 LLT LeftoverTy, DummyTy;
7223 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7228 int NarrowParts = Src1Regs.
size();
7229 Src1Regs.
append(Src1Left);
7230 Src2Regs.
append(Src2Left);
7233 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7235 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7238 if (i == e - 1 && CarryDst)
7239 CarryOut = CarryDst;
7241 CarryOut = MRI.createGenericVirtualRegister(
LLT::integer(1));
7244 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7245 {Src1Regs[i], Src2Regs[i]});
7246 }
else if (i == e - 1) {
7247 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7248 {Src1Regs[i], Src2Regs[i], CarryIn});
7250 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7251 {Src1Regs[i], Src2Regs[i], CarryIn});
7257 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7258 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7259 ArrayRef(DstRegs).drop_front(NarrowParts));
7261 MI.eraseFromParent();
7267 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7269 LLT Ty = MRI.getType(DstReg);
7273 unsigned Size = Ty.getSizeInBits();
7275 if (
Size % NarrowSize != 0)
7278 unsigned NumParts =
Size / NarrowSize;
7279 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7280 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7286 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7290 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7291 MI.eraseFromParent();
7301 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7304 LLT SrcTy = MRI.getType(Src);
7315 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7328 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7331 if (SizeOp1 % NarrowSize != 0)
7333 int NumParts = SizeOp1 / NarrowSize;
7336 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7340 uint64_t OpStart =
MI.getOperand(2).getImm();
7341 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7342 for (
int i = 0; i < NumParts; ++i) {
7343 unsigned SrcStart = i * NarrowSize;
7345 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7348 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7356 int64_t ExtractOffset;
7358 if (OpStart < SrcStart) {
7360 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7362 ExtractOffset = OpStart - SrcStart;
7363 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7367 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7369 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7370 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7377 if (MRI.getType(DstReg).isVector())
7378 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7379 else if (DstRegs.
size() > 1)
7380 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7383 MI.eraseFromParent();
7395 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7397 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7400 SrcRegs.
append(LeftoverRegs);
7404 uint64_t OpStart =
MI.getOperand(3).getImm();
7405 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7406 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7407 unsigned DstStart =
I * NarrowSize;
7409 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7417 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7419 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7423 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7431 int64_t ExtractOffset, InsertOffset;
7433 if (OpStart < DstStart) {
7435 ExtractOffset = DstStart - OpStart;
7436 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7438 InsertOffset = OpStart - DstStart;
7441 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7445 if (ExtractOffset != 0 || SegSize != OpSize) {
7447 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7448 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7451 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7452 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7460 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7463 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7465 MI.eraseFromParent();
7473 LLT DstTy = MRI.getType(DstReg);
7475 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7481 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7482 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7486 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7487 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7490 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7491 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7492 {Src0Regs[I], Src1Regs[I]});
7496 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7499 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7500 DstLeftoverRegs.
push_back(Inst.getReg(0));
7503 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7504 LeftoverTy, DstLeftoverRegs);
7506 MI.eraseFromParent();
7516 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7518 LLT DstTy = MRI.getType(DstReg);
7523 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7524 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7525 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7527 MI.eraseFromParent();
7537 Register CondReg =
MI.getOperand(1).getReg();
7538 LLT CondTy = MRI.getType(CondReg);
7539 if (CondTy.isVector())
7543 LLT DstTy = MRI.getType(DstReg);
7549 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7550 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7554 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7555 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7558 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7560 CondReg, Src1Regs[
I], Src2Regs[
I]);
7564 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7566 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7570 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7571 LeftoverTy, DstLeftoverRegs);
7573 MI.eraseFromParent();
7583 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7586 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7587 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON;
7590 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7592 auto C_0 =
B.buildConstant(NarrowTy, 0);
7594 UnmergeSrc.getReg(1), C_0);
7595 auto LoCTLZ = IsUndef ?
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0))
7596 :
B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7597 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7598 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7599 auto HiCTLZ =
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1));
7600 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7602 MI.eraseFromParent();
7615 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7618 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7619 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_POISON;
7622 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7624 auto C_0 =
B.buildConstant(NarrowTy, 0);
7626 UnmergeSrc.getReg(0), C_0);
7627 auto HiCTTZ = IsUndef ?
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1))
7628 :
B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7629 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7630 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7631 auto LoCTTZ =
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0));
7632 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7634 MI.eraseFromParent();
7647 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7650 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7655 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7659 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7660 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7668 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7669 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7672 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7673 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7675 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7677 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7679 MI.eraseFromParent();
7689 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7692 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7693 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7695 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7696 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7697 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7699 MI.eraseFromParent();
7714 LLT ExpTy = MRI.getType(ExpReg);
7719 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7720 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7721 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7722 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7724 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7726 MI.getOperand(2).setReg(Trunc.getReg(0));
7733 unsigned Opc =
MI.getOpcode();
7736 auto QAction = LI.getAction(Q).Action;
7742 case TargetOpcode::G_CTLZ_ZERO_POISON: {
7745 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7749 case TargetOpcode::G_CTLZ: {
7750 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7751 unsigned Len = SrcTy.getScalarSizeInBits();
7753 if (isSupported({TargetOpcode::G_CTLZ_ZERO_POISON, {DstTy, SrcTy}})) {
7755 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_POISON(DstTy, SrcReg);
7756 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7759 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7760 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7761 MI.eraseFromParent();
7777 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7778 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7781 Op = MIBOp.getReg(0);
7786 MI.eraseFromParent();
7789 case TargetOpcode::G_CTTZ_ZERO_POISON: {
7792 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7796 case TargetOpcode::G_CTTZ: {
7797 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7799 unsigned Len = SrcTy.getScalarSizeInBits();
7800 if (isSupported({TargetOpcode::G_CTTZ_ZERO_POISON, {DstTy, SrcTy}})) {
7803 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_POISON(DstTy, SrcReg);
7804 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7807 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7808 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7809 MI.eraseFromParent();
7816 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7817 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7819 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7820 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7821 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7822 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7825 MI.eraseFromParent();
7829 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7830 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7834 case TargetOpcode::G_CTPOP: {
7836 LLT Ty = MRI.getType(SrcReg);
7837 unsigned Size = Ty.getScalarSizeInBits();
7849 auto C_1 =
B.buildConstant(Ty, 1);
7850 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7852 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7853 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7854 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7858 auto C_2 =
B.buildConstant(Ty, 2);
7859 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7861 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7862 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7863 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7864 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7871 auto C_4 =
B.buildConstant(Ty, 4);
7872 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7873 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7875 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7876 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7878 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7881 if (
Size == 16 && !Ty.isVector()) {
7883 auto C_8 =
B.buildConstant(Ty, 8);
7884 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7885 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7886 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7887 MI.eraseFromParent();
7896 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7898 auto IsMulSupported = [
this](
const LLT Ty) {
7899 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7902 if (IsMulSupported(Ty)) {
7903 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7904 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7906 auto ResTmp = B8Count;
7907 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7908 auto ShiftC =
B.buildConstant(Ty, Shift);
7909 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7910 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7912 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7914 MI.eraseFromParent();
7917 case TargetOpcode::G_CTLS: {
7918 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7922 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7923 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7925 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7931 MI.eraseFromParent();
7952 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7953 LLT Ty = MRI.getType(Dst);
7954 LLT ShTy = MRI.getType(Z);
7961 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7962 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7967 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7968 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7972 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7985 MI.eraseFromParent();
7991 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7992 LLT Ty = MRI.getType(Dst);
7993 LLT ShTy = MRI.getType(Z);
7996 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8006 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8007 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8008 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
8009 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
8010 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
8014 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
8017 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
8020 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
8022 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8023 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8024 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
8027 auto One =
MIRBuilder.buildConstant(ShTy, 1);
8029 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
8031 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
8034 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
8035 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
8040 MI.eraseFromParent();
8051 LLT Ty = MRI.getType(Dst);
8052 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
8054 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8055 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8058 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
8059 return lowerFunnelShiftAsShifts(
MI);
8063 if (Result == UnableToLegalize)
8064 return lowerFunnelShiftAsShifts(
MI);
8069 auto [Dst, Src] =
MI.getFirst2Regs();
8070 LLT DstTy = MRI.getType(Dst);
8071 LLT SrcTy = MRI.getType(Src);
8075 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8083 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8087 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8091 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8096 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8097 {UnmergeSrc.getReg(0)});
8098 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8099 {UnmergeSrc.getReg(1)});
8102 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8104 MI.eraseFromParent();
8121 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8125 LLT DstTy = MRI.getType(DstReg);
8126 LLT SrcTy = MRI.getType(SrcReg);
8134 SrcTy.getElementCount().divideCoefficientBy(2));
8147 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8159 MI.eraseFromParent();
8168 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8169 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8170 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8171 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8172 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8173 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8174 MI.eraseFromParent();
8179 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8181 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8182 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8187 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8188 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8190 return lowerRotateWithReverseRotate(
MI);
8193 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8194 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8195 bool IsFShLegal =
false;
8196 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8197 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8201 MI.eraseFromParent();
8206 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8209 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8214 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8215 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8216 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8222 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8223 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8225 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8231 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8232 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8234 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8236 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8241 MI.eraseFromParent();
8249 auto [Dst, Src] =
MI.getFirst2Regs();
8254 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8282 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8295 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8299 MI.eraseFromParent();
8307 auto [Dst, Src] =
MI.getFirst2Regs();
8312 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8325 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8327 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8332 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8334 MI.eraseFromParent();
8342 auto [Dst, Src] =
MI.getFirst2Regs();
8346 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8357 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8358 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8360 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8367 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8368 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8369 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8371 MI.eraseFromParent();
8382 SrcTy.changeElementType(
LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8383 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8389 MI.eraseFromParent();
8394 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8397 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8398 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8399 MIRBuilder.buildSelect(Dst, Src, True, False);
8400 MI.eraseFromParent();
8404 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8424 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8431 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8432 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8433 MIRBuilder.buildSelect(Dst, Src, True, False);
8434 MI.eraseFromParent();
8438 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8444 if (DstTy.getScalarSizeInBits() == 32) {
8451 auto SignBit =
MIRBuilder.buildConstant(I64, 63);
8452 auto S =
MIRBuilder.buildAShr(I64, L, SignBit);
8454 auto LPlusS =
MIRBuilder.buildAdd(I64, L, S);
8461 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8462 MI.eraseFromParent();
8470 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8474 if (SrcTy !=
S64 && SrcTy !=
S32)
8476 if (DstTy !=
S32 && DstTy !=
S64)
8503 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8505 MI.eraseFromParent();
8510 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8515 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8522 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8524 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8525 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8527 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8528 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8530 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8532 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8533 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8534 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8537 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8538 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8539 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8541 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8544 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8549 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8550 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8556 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8558 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8559 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8561 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8566 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8567 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8569 MI.eraseFromParent();
8575 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8577 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8578 unsigned SatWidth = DstTy.getScalarSizeInBits();
8582 APInt MinInt, MaxInt;
8605 if (AreExactFloatBounds) {
8607 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8610 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8612 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8621 MI.eraseFromParent();
8626 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8631 MI.eraseFromParent();
8638 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8646 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8656 MI.eraseFromParent();
8662 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8666 MI.eraseFromParent();
8673 assert((
MI.getOpcode() == TargetOpcode::G_FPEXT ||
8674 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8675 "Only G_FPEXT and G_FPTRUNC are expected");
8677 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8682 if (
MI.getOpcode() == TargetOpcode::G_FPEXT) {
8684 StoreOpc = TargetOpcode::G_STORE;
8685 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8688 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8689 LoadOpc = TargetOpcode::G_LOAD;
8698 StackTy, StackTyAlign);
8699 MIRBuilder.buildStoreInstr(StoreOpc, SrcReg, StackTemp, *StoreMMO);
8702 StackTy, StackTyAlign);
8703 MIRBuilder.buildLoadInstr(LoadOpc, DstReg, StackTemp, *LoadMMO);
8705 MI.eraseFromParent();
8715 auto [Dst, Src] =
MI.getFirst2Regs();
8719 if (MRI.getType(Src).isVector())
8723 unsigned Flags =
MI.getFlags();
8726 MI.eraseFromParent();
8730 const unsigned ExpMask = 0x7ff;
8731 const unsigned ExpBiasf64 = 1023;
8732 const unsigned ExpBiasf16 = 15;
8761 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8821 MI.eraseFromParent();
8828 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8838 auto SrcI =
MIRBuilder.buildBitcast(I32Ty, SrcReg);
8860 auto Trunc =
MIRBuilder.buildTrunc(I16Ty, Srl);
8862 MI.eraseFromParent();
8868 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8869 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8872 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8879 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8880 LLT Ty = MRI.getType(Dst);
8882 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8883 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8884 MI.eraseFromParent();
8889 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8890 LLT Ty = MRI.getType(Src);
8891 auto Flags =
MI.getFlags();
8899 FracToUse = FracPart.getReg(0);
8901 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8905 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8907 FracToUse =
Select.getReg(0);
8910 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8913 MI.eraseFromParent();
8919 case TargetOpcode::G_SMIN:
8921 case TargetOpcode::G_SMAX:
8923 case TargetOpcode::G_UMIN:
8925 case TargetOpcode::G_UMAX:
8933 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8938 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8939 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8941 MI.eraseFromParent();
8950 LLT DstTy = MRI.getType(Dst);
8951 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8961 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8962 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8964 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8967 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8968 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8969 if (TLI.preferSelectsOverBooleanArithmetic(
8972 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8973 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8975 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8976 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8982 unsigned BoolExtOp =
8984 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8985 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8989 MI.eraseFromParent();
8995 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8996 const int Src0Size = Src0Ty.getScalarSizeInBits();
8997 const int Src1Size = Src1Ty.getScalarSizeInBits();
9007 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9008 Src0Ty.getScalarType().isInteger()))
9009 Src0Int =
MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
9011 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9012 Src1Ty.getScalarType().isInteger()))
9013 Src1Int =
MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
9018 auto NotSignBitMask =
MIRBuilder.buildConstant(
9022 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
9024 if (Src0Ty == Src1Ty) {
9025 And1 =
MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
9026 }
else if (Src0Size > Src1Size) {
9027 auto ShiftAmt =
MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
9028 auto Zext =
MIRBuilder.buildZExt(Src0IntTy, Src1Int);
9029 auto Shift =
MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
9030 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
9032 auto ShiftAmt =
MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
9033 auto Shift =
MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
9034 auto Trunc =
MIRBuilder.buildTrunc(Src0IntTy, Shift);
9035 And1 =
MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
9041 unsigned Flags =
MI.getFlags();
9046 if (DstTy == DstIntTy)
9047 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
9053 MI.eraseFromParent();
9064 switch (
MI.getOpcode()) {
9065 case TargetOpcode::G_FMINNUM:
9066 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9068 case TargetOpcode::G_FMINIMUMNUM:
9069 NewOp = TargetOpcode::G_FMINNUM;
9071 case TargetOpcode::G_FMAXNUM:
9072 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9074 case TargetOpcode::G_FMAXIMUMNUM:
9075 NewOp = TargetOpcode::G_FMAXNUM;
9081 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9082 LLT Ty = MRI.getType(Dst);
9091 if (!VT->isKnownNeverSNaN(Src0))
9092 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
9094 if (!VT->isKnownNeverSNaN(Src1))
9095 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
9100 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
9101 MI.eraseFromParent();
9107 unsigned Opc =
MI.getOpcode();
9108 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9109 LLT Ty = MRI.getType(Dst);
9112 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
9114 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9115 unsigned OpcNonIeee =
9116 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9117 bool MinMaxMustRespectOrderedZero =
false;
9121 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9123 MinMaxMustRespectOrderedZero =
true;
9124 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9129 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9134 (!VT->isKnownNeverNaN(Src0) || !VT->isKnownNeverNaN(Src1))) {
9137 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
9141 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9143 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9153 const unsigned Flags =
MI.getFlags();
9159 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9161 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9163 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9165 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9167 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9172 MI.eraseFromParent();
9179 LLT Ty = MRI.getType(DstReg);
9180 unsigned Flags =
MI.getFlags();
9185 MI.eraseFromParent();
9191 auto [DstReg,
X] =
MI.getFirst2Regs();
9192 const unsigned Flags =
MI.getFlags();
9193 const LLT Ty = MRI.getType(DstReg);
9205 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9207 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9212 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9213 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9214 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9215 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9217 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9219 MI.eraseFromParent();
9224 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9225 unsigned Flags =
MI.getFlags();
9226 LLT Ty = MRI.getType(DstReg);
9233 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9234 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9237 SrcReg, Zero, Flags);
9239 SrcReg, Trunc, Flags);
9243 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9244 MI.eraseFromParent();
9250 const unsigned NumOps =
MI.getNumOperands();
9251 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9252 unsigned PartSize = Src0Ty.getSizeInBits();
9257 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9258 const unsigned Offset = (
I - 1) * PartSize;
9261 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9264 MRI.createGenericVirtualRegister(WideTy);
9267 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9268 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9269 ResultReg = NextResult;
9272 if (DstTy.isPointer()) {
9273 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9274 DstTy.getAddressSpace())) {
9282 MI.eraseFromParent();
9288 const unsigned NumDst =
MI.getNumOperands() - 1;
9289 Register SrcReg =
MI.getOperand(NumDst).getReg();
9290 Register Dst0Reg =
MI.getOperand(0).getReg();
9291 LLT DstTy = MRI.getType(Dst0Reg);
9300 LLT IntTy = MRI.getType(SrcReg);
9305 unsigned Offset = DstSize;
9306 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9308 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9312 MI.eraseFromParent();
9331 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9332 InsertVal =
MI.getOperand(2).getReg();
9334 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9336 LLT VecTy = MRI.getType(SrcVec);
9346 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9347 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9349 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9352 MI.eraseFromParent();
9357 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9368 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9375 int64_t
Offset = IdxVal * EltBytes;
9386 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9389 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9391 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9394 MI.eraseFromParent();
9400 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9401 MI.getFirst3RegLLTs();
9411 for (
int Idx : Mask) {
9413 if (!
Undef.isValid())
9419 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9421 int NumElts = Src0Ty.getNumElements();
9422 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9423 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9424 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9426 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9428 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9433 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9434 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9435 MI.eraseFromParent();
9441 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9442 MI.getFirst4RegLLTs();
9444 if (VecTy.isScalableVector())
9460 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9463 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9466 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9469 std::optional<APInt> PassthruSplatVal =
9472 if (PassthruSplatVal.has_value()) {
9474 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9475 }
else if (HasPassthru) {
9476 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9477 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9483 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9487 unsigned NumElmts = VecTy.getNumElements();
9488 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9490 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9493 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9496 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9501 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9503 if (HasPassthru &&
I == NumElmts - 1) {
9506 auto AllLanesSelected =
MIRBuilder.buildICmp(
9508 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9509 {OutPos, EndOfVector});
9513 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9515 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9520 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9522 MI.eraseFromParent();
9533 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9539 if (Alignment >
Align(1)) {
9542 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9551 const auto &MF = *
MI.getMF();
9557 Register AllocSize =
MI.getOperand(1).getReg();
9560 LLT PtrTy = MRI.getType(Dst);
9561 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9568 MI.eraseFromParent();
9574 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9579 MI.eraseFromParent();
9585 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9590 MI.eraseFromParent();
9596 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9597 unsigned Offset =
MI.getOperand(2).getImm();
9600 if (SrcTy.isVector()) {
9601 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9602 unsigned DstSize = DstTy.getSizeInBits();
9604 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9605 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9607 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9611 for (
unsigned Idx =
Offset / SrcEltSize;
9612 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9613 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9615 if (SubVectorElts.
size() == 1)
9616 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9618 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9620 MI.eraseFromParent();
9626 if ((SrcTy.isPointer() &&
9627 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9628 (DstTy.isPointer() &&
9629 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9630 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9634 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9635 (SrcTy.isScalar() || SrcTy.isPointer() ||
9636 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9637 LLT SrcIntTy = SrcTy;
9638 if (!SrcTy.isScalar()) {
9640 SrcReg =
MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9644 if (DstTy.isPointer())
9646 MRI.createGenericVirtualRegister(
LLT::scalar(DstTy.getSizeInBits()));
9652 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9656 if (DstTy.isPointer())
9659 MI.eraseFromParent();
9667 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9670 LLT DstTy = MRI.getType(Src);
9671 LLT InsertTy = MRI.getType(InsertSrc);
9674 bool IsNonIntegralInsert =
9684 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9685 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9692 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9694 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9698 for (; Idx <
Offset / EltSize; ++Idx) {
9699 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9704 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9705 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9707 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9711 InsertSrc =
MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9713 InsertSrc =
MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9720 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9723 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9724 MI.eraseFromParent();
9733 if (IsNonIntegralDst || IsNonIntegralInsert) {
9734 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9738 LLT IntDstTy = DstTy;
9742 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9747 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9753 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9759 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9760 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9761 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9764 MI.eraseFromParent();
9770 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9771 MI.getFirst4RegLLTs();
9772 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9775 LLT BoolTy = Dst1Ty;
9777 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9792 auto ResultLowerThanLHS =
9796 MIRBuilder.buildXor(Dst1, RHSNegative, ResultLowerThanLHS);
9800 auto LHSLessThanRHS =
9802 auto ResultNegative =
9804 MIRBuilder.buildXor(Dst1, LHSLessThanRHS, ResultNegative);
9808 MI.eraseFromParent();
9814 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9815 const LLT Ty = MRI.getType(Res);
9818 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9819 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9820 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9831 MI.eraseFromParent();
9836 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9837 const LLT Ty = MRI.getType(Res);
9840 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9841 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9842 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9847 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9852 MI.eraseFromParent();
9858 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9859 LLT Ty = MRI.getType(Res);
9863 switch (
MI.getOpcode()) {
9866 case TargetOpcode::G_UADDSAT:
9869 BaseOp = TargetOpcode::G_ADD;
9871 case TargetOpcode::G_SADDSAT:
9874 BaseOp = TargetOpcode::G_ADD;
9876 case TargetOpcode::G_USUBSAT:
9879 BaseOp = TargetOpcode::G_SUB;
9881 case TargetOpcode::G_SSUBSAT:
9884 BaseOp = TargetOpcode::G_SUB;
9899 uint64_t NumBits = Ty.getScalarSizeInBits();
9910 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9918 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9923 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9924 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9927 MI.eraseFromParent();
9933 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9934 LLT Ty = MRI.getType(Res);
9938 unsigned OverflowOp;
9939 switch (
MI.getOpcode()) {
9942 case TargetOpcode::G_UADDSAT:
9945 OverflowOp = TargetOpcode::G_UADDO;
9947 case TargetOpcode::G_SADDSAT:
9950 OverflowOp = TargetOpcode::G_SADDO;
9952 case TargetOpcode::G_USUBSAT:
9955 OverflowOp = TargetOpcode::G_USUBO;
9957 case TargetOpcode::G_SSUBSAT:
9960 OverflowOp = TargetOpcode::G_SSUBO;
9965 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9966 Register Tmp = OverflowRes.getReg(0);
9967 Register Ov = OverflowRes.getReg(1);
9976 uint64_t NumBits = Ty.getScalarSizeInBits();
9977 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9978 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9981 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9989 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9993 MI.eraseFromParent();
9999 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
10000 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
10001 "Expected shlsat opcode!");
10002 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
10003 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
10004 LLT Ty = MRI.getType(Res);
10008 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
10009 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
10018 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
10023 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
10025 MI.eraseFromParent();
10030 auto [Dst, Src] =
MI.getFirst2Regs();
10031 const LLT Ty = MRI.getType(Src);
10032 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10033 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10036 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
10037 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
10038 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10039 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
10042 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
10044 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
10045 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
10046 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
10048 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
10049 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
10050 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
10052 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10053 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
10054 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
10056 Res.getInstr()->getOperand(0).setReg(Dst);
10058 MI.eraseFromParent();
10065 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
10068 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
10069 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
10070 return B.buildOr(Dst,
LHS,
RHS);
10075 auto [Dst, Src] =
MI.getFirst2Regs();
10076 const LLT SrcTy = MRI.getType(Src);
10077 unsigned Size = SrcTy.getScalarSizeInBits();
10078 unsigned VSize = SrcTy.getSizeInBits();
10081 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10082 (LI.isLegal({TargetOpcode::G_BITREVERSE,
10083 {LLT::fixed_vector(VSize / 8, LLT::integer(8)),
10084 LLT::fixed_vector(VSize / 8, LLT::integer(8))}}))) {
10089 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
10090 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
10091 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
10095 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
10118 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
10122 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
10125 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
10129 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
10133 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
10138 MI.eraseFromParent();
10146 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10147 int NameOpIdx = IsRead ? 1 : 0;
10148 int ValRegIndex = IsRead ? 0 : 1;
10150 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
10151 const LLT Ty = MRI.getType(ValReg);
10153 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10160 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
10161 Fn,
MI.getDebugLoc()));
10165 MI.eraseFromParent();
10174 MI.eraseFromParent();
10180 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
10181 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10182 Register Result =
MI.getOperand(0).getReg();
10183 LLT OrigTy = MRI.getType(Result);
10187 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
10188 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
10190 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10192 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
10193 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
10196 MI.eraseFromParent();
10202 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10207 MI.eraseFromParent();
10212 MI.eraseFromParent();
10219 unsigned BitSize = SrcTy.getScalarSizeInBits();
10223 auto AsInt = SrcTy == IntTy ?
MIRBuilder.buildCopy(IntTy, SrcReg)
10230 APInt ExpMask = Inf;
10232 APInt QNaNBitMask =
10236 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10237 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10238 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10239 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10240 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10242 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10246 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10248 LLT DstTyCopy = DstTy;
10250 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10278 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10281 Mask &= ~PartialCheck;
10290 else if (PartialCheck ==
fcZero)
10302 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10303 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10304 auto SubnormalRes =
10306 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10308 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10309 appendToRes(SubnormalRes);
10316 else if (PartialCheck ==
fcInf)
10321 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10328 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10329 if (PartialCheck ==
fcNan) {
10333 }
else if (PartialCheck ==
fcQNan) {
10343 Abs, InfWithQnanBitC);
10344 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10351 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10353 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10354 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10357 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10359 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10362 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10363 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10365 appendToRes(NormalRes);
10369 MI.eraseFromParent();
10375 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10376 MI.getFirst4RegLLTs();
10385 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10386 Op1Ty = MRI.getType(Op1Reg);
10387 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10388 Op2Ty = MRI.getType(Op2Reg);
10392 if (MaskTy.isScalar()) {
10400 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10403 MaskTy = DstTy.changeElementType(
LLT::integer(DstTy.getScalarSizeInBits()));
10405 MIRBuilder.buildSExtOrTrunc(MaskTy.getScalarType(), MaskElt).getReg(0);
10407 if (DstTy.isVector()) {
10409 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MaskTy, MaskElt);
10410 MaskReg = ShufSplat.getReg(0);
10414 }
else if (!DstTy.isVector()) {
10419 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10423 if (!Op1Ty.getScalarType().isAnyScalar() &&
10424 !Op1Ty.getScalarType().isInteger())
10425 Op1Reg =
MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10427 if (!Op2Ty.getScalarType().isAnyScalar() &&
10428 !Op2Ty.getScalarType().isInteger()) {
10430 Op2Ty.changeElementType(
LLT::integer(Op2Ty.getScalarSizeInBits()));
10431 Op2Reg =
MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10434 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10435 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10436 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10441 if (DstTy == Op1TyInt)
10444 auto Or =
MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10448 MI.eraseFromParent();
10454 unsigned Opcode =
MI.getOpcode();
10457 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10458 : TargetOpcode::G_UDIV,
10459 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10461 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10462 : TargetOpcode::G_UREM,
10463 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10464 MI.eraseFromParent();
10474 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10478 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10481 MI.eraseFromParent();
10491 Register SrcReg =
MI.getOperand(1).getReg();
10492 LLT Ty = MRI.getType(SrcReg);
10493 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10496 MI.eraseFromParent();
10502 Register SrcReg =
MI.getOperand(1).getReg();
10503 Register DestReg =
MI.getOperand(0).getReg();
10505 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10506 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10509 MI.eraseFromParent();
10515 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10516 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10517 "Expected G_ABDS or G_ABDU instruction");
10519 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10520 LLT Ty = MRI.getType(LHS);
10530 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10532 MI.eraseFromParent();
10538 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10539 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10540 "Expected G_ABDS or G_ABDU instruction");
10542 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10543 LLT Ty = MRI.getType(LHS);
10548 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10549 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10550 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10552 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10553 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10555 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10557 MI.eraseFromParent();
10562 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10567 if (!(SrcTy.getScalarType().isAnyScalar() ||
10568 SrcTy.getScalarType().isInteger())) {
10570 SrcTy.changeElementType(
LLT::integer(SrcTy.getScalarSizeInBits()));
10571 CastedSrc =
MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10574 if (MRI.getType(DstReg) != TyInt) {
10578 .buildAnd(TyInt, CastedSrc,
10581 DstTy.getScalarSizeInBits())))
10593 MI.eraseFromParent();
10599 Register SrcReg =
MI.getOperand(1).getReg();
10600 LLT SrcTy = MRI.getType(SrcReg);
10601 LLT DstTy = MRI.getType(SrcReg);
10604 if (SrcTy.isScalar()) {
10609 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10620 Register ListPtr =
MI.getOperand(1).getReg();
10621 LLT PtrTy = MRI.getType(ListPtr);
10628 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10630 const Align A(
MI.getOperand(2).getImm());
10632 if (
A > TLI.getMinStackArgumentAlignment()) {
10634 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10635 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10636 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10637 VAList = AndDst.getReg(0);
10644 LLT LLTTy = MRI.getType(Dst);
10647 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10648 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10653 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10655 Align EltAlignment =
DL.getABITypeAlign(Ty);
10658 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10660 MI.eraseFromParent();
10665 [[maybe_unused]]
unsigned OpCode =
MI.getOpcode();
10666 assert((OpCode == TargetOpcode::G_SMULFIX ||
10667 OpCode == TargetOpcode::G_UMULFIX) &&
10668 "Operator must be either G_SMULFIX or G_UMULFIX!");
10669 auto [Dst, LHS, RHS] =
MI.getFirst3Regs();
10670 LLT Ty = MRI.getType(Dst);
10671 unsigned Scale =
MI.getOperand(3).getImm();
10675 MI.eraseFromParent();
10681 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, Scale);
10683 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX) {
10692 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX)
10699 MI.eraseFromParent();
10714 unsigned Limit,
const MemOp &
Op,
10715 unsigned DstAS,
unsigned SrcAS,
10716 const AttributeList &FuncAttributes,
10718 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10728 if (
Op.isFixedDstAlign())
10729 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10732 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10736 unsigned NumMemOps = 0;
10739 unsigned TySize = Ty.getSizeInBytes();
10740 while (TySize >
Size) {
10750 assert(NewTySize > 0 &&
"Could not find appropriate type");
10757 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10759 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10765 TySize = NewTySize;
10769 if (++NumMemOps > Limit)
10772 MemOps.push_back(Ty);
10782 unsigned NumBits = Ty.getScalarSizeInBits();
10784 if (!Ty.isVector() && ValVRegAndVal) {
10785 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10793 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10814 uint64_t KnownLen,
Align Alignment,
10816 auto &MF = *
MI.getParent()->getParent();
10821 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10823 bool DstAlignCanChange =
false;
10827 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10829 DstAlignCanChange =
true;
10831 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10832 std::vector<LLT> MemOps;
10834 const auto &DstMMO = **
MI.memoperands_begin();
10835 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10838 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10849 if (DstAlignCanChange) {
10852 Align NewAlign =
DL.getABITypeAlign(IRTy);
10853 if (NewAlign > Alignment) {
10854 Alignment = NewAlign;
10862 MachineIRBuilder MIB(
MI);
10864 LLT LargestTy = MemOps[0];
10865 for (
unsigned i = 1; i < MemOps.size(); i++)
10867 LargestTy = MemOps[i];
10879 LLT PtrTy = MRI.getType(Dst);
10880 unsigned DstOff = 0;
10881 unsigned Size = KnownLen;
10882 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10883 LLT Ty = MemOps[
I];
10885 if (TySize >
Size) {
10888 assert(
I == MemOps.size() - 1 &&
I != 0);
10889 DstOff -= TySize -
Size;
10899 TLI.isTruncateFree(LargestVT, VT))
10900 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10913 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10916 MIB.buildStore(
Value, Ptr, *StoreMMO);
10921 MI.eraseFromParent();
10927 uint64_t KnownLen, uint64_t Limit,
Align DstAlign,
10928 Align SrcAlign,
bool IsVolatile) {
10929 auto &MF = *
MI.getParent()->getParent();
10934 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10936 bool DstAlignCanChange =
false;
10938 Align Alignment = std::min(DstAlign, SrcAlign);
10940 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10942 DstAlignCanChange =
true;
10948 std::vector<LLT> MemOps;
10950 const auto &DstMMO = **
MI.memoperands_begin();
10951 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10952 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10953 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10957 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10963 if (DstAlignCanChange) {
10966 Align NewAlign =
DL.getABITypeAlign(IRTy);
10971 if (!
TRI->hasStackRealignment(MF))
10972 if (MaybeAlign StackAlign =
DL.getStackAlignment())
10973 NewAlign = std::min(NewAlign, *StackAlign);
10975 if (NewAlign > Alignment) {
10976 Alignment = NewAlign;
10984 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
10986 MachineIRBuilder MIB(
MI);
10992 unsigned CurrOffset = 0;
10993 unsigned Size = KnownLen;
10994 for (
auto CopyTy : MemOps) {
10997 if (CopyTy.getSizeInBytes() >
Size)
10998 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
11009 if (CurrOffset != 0) {
11010 LLT SrcTy = MRI.getType(Src);
11014 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11016 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
11020 if (CurrOffset != 0) {
11021 LLT DstTy = MRI.getType(Dst);
11022 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11024 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
11025 CurrOffset += CopyTy.getSizeInBytes();
11026 Size -= CopyTy.getSizeInBytes();
11029 MI.eraseFromParent();
11035 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
11037 auto &MF = *
MI.getParent()->getParent();
11042 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
11044 bool DstAlignCanChange =
false;
11047 Align Alignment = std::min(DstAlign, SrcAlign);
11049 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
11051 DstAlignCanChange =
true;
11053 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
11054 std::vector<LLT> MemOps;
11056 const auto &DstMMO = **
MI.memoperands_begin();
11057 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
11058 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
11059 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
11066 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11072 if (DstAlignCanChange) {
11075 Align NewAlign =
DL.getABITypeAlign(IRTy);
11080 if (!
TRI->hasStackRealignment(MF))
11081 if (MaybeAlign StackAlign =
DL.getStackAlignment())
11082 NewAlign = std::min(NewAlign, *StackAlign);
11084 if (NewAlign > Alignment) {
11085 Alignment = NewAlign;
11093 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
11095 MachineIRBuilder MIB(
MI);
11099 unsigned CurrOffset = 0;
11100 SmallVector<Register, 16> LoadVals;
11101 for (
auto CopyTy : MemOps) {
11108 if (CurrOffset != 0) {
11109 LLT SrcTy = MRI.getType(Src);
11112 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11114 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
11115 CurrOffset += CopyTy.getSizeInBytes();
11119 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
11120 LLT CopyTy = MemOps[
I];
11126 if (CurrOffset != 0) {
11127 LLT DstTy = MRI.getType(Dst);
11130 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11132 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
11135 MI.eraseFromParent();
11141 const unsigned Opc =
MI.getOpcode();
11144 assert((
Opc == TargetOpcode::G_MEMCPY ||
11145 Opc == TargetOpcode::G_MEMCPY_INLINE ||
11146 Opc == TargetOpcode::G_MEMMOVE ||
Opc == TargetOpcode::G_MEMSET) &&
11147 "Expected memcpy like instruction");
11149 auto MMOIt =
MI.memoperands_begin();
11154 auto [Dst, Src, Len] =
MI.getFirst3Regs();
11156 if (
Opc != TargetOpcode::G_MEMSET) {
11157 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
11158 MemOp = *(++MMOIt);
11159 SrcAlign =
MemOp->getBaseAlign();
11164 if (!LenVRegAndVal) {
11166 assert(
Opc != TargetOpcode::G_MEMCPY_INLINE &&
11167 "inline memcpy with dynamic size is not yet supported");
11170 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11172 if (KnownLen == 0) {
11173 MI.eraseFromParent();
11177 if (
Opc != TargetOpcode::G_MEMCPY_INLINE && MaxLen && KnownLen > MaxLen)
11180 bool IsVolatile =
MemOp->isVolatile();
11181 if (
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMCPY_INLINE) {
11182 auto &MF = *
MI.getParent()->getParent();
11185 uint64_t Limit =
Opc == TargetOpcode::G_MEMCPY_INLINE
11186 ? std::numeric_limits<uint64_t>::max()
11187 : TLI.getMaxStoresPerMemcpy(OptSize);
11188 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11191 if (
Opc == TargetOpcode::G_MEMMOVE)
11192 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11193 if (
Opc == TargetOpcode::G_MEMSET)
11194 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getValueReg() const
Get the stored value register.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMulfix(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
Represent a constant reference to a string, i.e.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
@ Custom
The result value requires a custom uniformity check.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.