LLVM 23.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
147#include "llvm/Support/Debug.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
170 unsigned NumAlignedDPRCS2Regs);
171
181
182/// Get the spill area that Reg should be saved into in the prologue.
185 unsigned NumAlignedDPRCS2Regs,
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
312
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
392 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
433 MachineBasicBlock *MBB = MBBI->getParent();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
454 .addImm(/*Wide=*/1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MBBI->getOperand(0));
464 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
468 MBB->erase(MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
477 .addImm(/*Wide=*/0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
490 .addImm(/*Wide=*/1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(MBBI, MIB);
493 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
494 .addImm(/*Wide=*/1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(0).getReg() == ARM::SP &&
500 MBBI->getOperand(2).getReg() == ARM::SP &&
501 MBBI->getOperand(3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
503 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
504 .addImm(1ULL << Reg)
505 .addImm(/*Wide=*/1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(1).getReg() == ARM::SP &&
514 MBBI->getOperand(2).getReg() == ARM::SP &&
515 MBBI->getOperand(3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
517 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
518 .addImm(1ULL << Reg)
519 .addImm(/*Wide=*/1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
564 MBB->erase(MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
570 .addImm(Mask)
571 .addImm(Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
585 .addImm(First)
586 .addImm(Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
593 .addImm(MBBI->getOperand(2).getImm() * 4)
594 .addImm(/*Wide=*/0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
602 .addImm(MBBI->getOperand(2).getImm())
603 .addImm(/*Wide=*/1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
611 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
612 .addImm(Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
617 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
618 .addImm(Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error("No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::t2BXAUT_RET:
627 case ARM::TCRETURNri:
628 case ARM::TCRETURNrinotr12:
629 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
630 .addImm(/*Wide=*/0)
631 .setMIFlags(Flags);
632 break;
633
634 case ARM::TCRETURNdi:
635 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
636 .addImm(/*Wide=*/1)
637 .setMIFlags(Flags);
638 break;
639 }
640 return MBB->insertAfter(MBBI, MIB);
641}
642
645 if (MBBI == MBB.begin())
647 return std::prev(MBBI);
648}
649
653 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
654 if (Start.isValid())
655 Start = std::next(Start);
656 else
657 Start = MBB.begin();
658
659 for (auto MI = Start; MI != End;) {
660 auto Next = std::next(MI);
661 // Check if this instruction already has got a SEH opcode added. In that
662 // case, don't do this generic mapping.
663 if (Next != End && isSEHInstruction(*Next)) {
664 MI = std::next(Next);
665 while (MI != End && isSEHInstruction(*MI))
666 ++MI;
667 continue;
668 }
669 insertSEH(MI, TII, MIFlags);
670 MI = Next;
671 }
672}
673
676 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
677 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
678 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
679 if (isARM)
680 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
681 Pred, PredReg, TII, MIFlags);
682 else
683 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
684 Pred, PredReg, TII, MIFlags);
685}
686
687static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
689 const ARMBaseInstrInfo &TII, int NumBytes,
690 unsigned MIFlags = MachineInstr::NoFlags,
692 unsigned PredReg = 0) {
693 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
694 MIFlags, Pred, PredReg);
695}
696
698 int RegSize;
699 switch (MI.getOpcode()) {
700 case ARM::VSTMDDB_UPD:
701 RegSize = 8;
702 break;
703 case ARM::STMDB_UPD:
704 case ARM::t2STMDB_UPD:
705 RegSize = 4;
706 break;
707 case ARM::t2STR_PRE:
708 case ARM::STR_PRE_IMM:
709 return 4;
710 default:
711 llvm_unreachable("Unknown push or pop like instruction");
712 }
713
714 int count = 0;
715 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
716 // pred) so the list starts at 4.
717 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
718 count += RegSize;
719 return count;
720}
721
723 size_t StackSizeInBytes) {
724 const MachineFrameInfo &MFI = MF.getFrameInfo();
725 const Function &F = MF.getFunction();
726 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
727
728 StackProbeSize =
729 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
730 return (StackSizeInBytes >= StackProbeSize) &&
731 !F.hasFnAttribute("no-stack-arg-probe");
732}
733
734namespace {
735
736struct StackAdjustingInsts {
737 struct InstInfo {
739 unsigned SPAdjust;
740 bool BeforeFPSet;
741
742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
743 void dump() {
744 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
745 << "sp-adjust=" << SPAdjust;
746 I->dump();
747 }
748#endif
749 };
750
752
753 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
754 bool BeforeFPSet = false) {
755 InstInfo Info = {I, SPAdjust, BeforeFPSet};
756 Insts.push_back(Info);
757 }
758
759 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
760 auto Info =
761 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
762 assert(Info != Insts.end() && "invalid sp adjusting instruction");
763 Info->SPAdjust += ExtraBytes;
764 }
765
766 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
767 CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
768 unsigned CFAOffset = 0;
769 for (auto &Info : Insts) {
770 if (HasFP && !Info.BeforeFPSet)
771 return;
772
773 CFAOffset += Info.SPAdjust;
774 CFIBuilder.setInsertPoint(std::next(Info.I));
775 CFIBuilder.buildDefCFAOffset(CFAOffset);
776 }
777 }
778
779#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
780 void dump() {
781 dbgs() << "StackAdjustingInsts:\n";
782 for (auto &Info : Insts)
783 Info.dump();
784 }
785#endif
786};
787
788} // end anonymous namespace
789
790/// Emit an instruction sequence that will align the address in
791/// register Reg by zero-ing out the lower bits. For versions of the
792/// architecture that support Neon, this must be done in a single
793/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
794/// single instruction. That function only gets called when optimizing
795/// spilling of D registers on a core with the Neon instruction set
796/// present.
798 const TargetInstrInfo &TII,
801 const DebugLoc &DL, const unsigned Reg,
802 const Align Alignment,
803 const bool MustBeSingleInstruction) {
804 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
805 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
806 const unsigned AlignMask = Alignment.value() - 1U;
807 const unsigned NrBitsToZero = Log2(Alignment);
808 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
809 if (!AFI->isThumbFunction()) {
810 // if the BFC instruction is available, use that to zero the lower
811 // bits:
812 // bfc Reg, #0, log2(Alignment)
813 // otherwise use BIC, if the mask to zero the required number of bits
814 // can be encoded in the bic immediate field
815 // bic Reg, Reg, Alignment-1
816 // otherwise, emit
817 // lsr Reg, Reg, log2(Alignment)
818 // lsl Reg, Reg, log2(Alignment)
819 if (CanUseBFC) {
820 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
822 .addImm(~AlignMask)
824 } else if (AlignMask <= 255) {
825 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
827 .addImm(AlignMask)
829 .add(condCodeOp());
830 } else {
831 assert(!MustBeSingleInstruction &&
832 "Shouldn't call emitAligningInstructions demanding a single "
833 "instruction to be emitted for large stack alignment for a target "
834 "without BFC.");
835 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
837 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
839 .add(condCodeOp());
840 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
842 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
844 .add(condCodeOp());
845 }
846 } else {
847 // Since this is only reached for Thumb-2 targets, the BFC instruction
848 // should always be available.
849 assert(CanUseBFC);
850 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
852 .addImm(~AlignMask)
854 }
855}
856
857/// We need the offset of the frame pointer relative to other MachineFrameInfo
858/// offsets which are encoded relative to SP at function begin.
859/// See also emitPrologue() for how the FP is set up.
860/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
861/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
862/// this to produce a conservative estimate that we check in an assert() later.
863static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
864 const MachineFunction &MF) {
867 // For Thumb1, push.w isn't available, so the first push will always push
868 // r7 and lr onto the stack first.
869 if (AFI.isThumb1OnlyFunction())
870 return -AFI.getArgRegsSaveSize() - (2 * 4);
871 // This is a conservative estimation: Assume the frame pointer being r7 and
872 // pc("r15") up to r8 getting spilled before (= 8 registers).
873 int MaxRegBytes = 8 * 4;
874 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
875 // Here, r11 can be stored below all of r4-r15.
876 MaxRegBytes = 11 * 4;
877 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
878 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
879 MaxRegBytes = 11 * 4 + 8 * 8;
880 }
881 int FPCXTSaveSize =
882 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
883 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
884}
885
887 MachineBasicBlock &MBB) const {
889 MachineFrameInfo &MFI = MF.getFrameInfo();
891 const TargetMachine &TM = MF.getTarget();
892 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
893 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
895 "This emitPrologue does not support Thumb1!");
896 bool isARM = !AFI->isThumbFunction();
897 Align Alignment = STI.getFrameLowering()->getStackAlign();
898 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
899 unsigned NumBytes = MFI.getStackSize();
900 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
901 int FPCXTSaveSize = 0;
902 bool NeedsWinCFI = needsWinCFI(MF);
904 STI.getPushPopSplitVariation(MF);
905
906 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
907
908 // Debug location must be unknown since the first debug location is used
909 // to determine the end of the prologue.
910 DebugLoc dl;
911
912 Register FramePtr = RegInfo->getFrameRegister(MF);
913
914 // Determine the sizes of each callee-save spill areas and record which frame
915 // belongs to which callee-save spill areas.
916 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
917 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
918 int FramePtrSpillFI = 0;
919 int D8SpillFI = 0;
920
921 // All calls are tail calls in GHC calling conv, and functions have no
922 // prologue/epilogue.
924 return;
925
926 StackAdjustingInsts DefCFAOffsetCandidates;
927 bool HasFP = hasFP(MF);
928
929 if (!AFI->hasStackFrame() &&
930 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
931 if (NumBytes != 0) {
932 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
934 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
935 }
936 if (!NeedsWinCFI)
937 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
938 if (NeedsWinCFI && MBBI != MBB.begin()) {
940 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
942 MF.setHasWinCFI(true);
943 }
944 return;
945 }
946
947 // Determine spill area sizes, and some important frame indices.
948 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
949 bool BeforeFPPush = true;
950 for (const CalleeSavedInfo &I : CSI) {
951 MCRegister Reg = I.getReg();
952 int FI = I.getFrameIdx();
953
954 SpillArea Area = getSpillArea(Reg, PushPopSplit,
955 AFI->getNumAlignedDPRCS2Regs(), RegInfo);
956
957 if (Reg == FramePtr.asMCReg()) {
958 FramePtrSpillFI = FI;
959 FramePtrSpillArea = Area;
960 }
961 if (Reg == ARM::D8)
962 D8SpillFI = FI;
963
964 switch (Area) {
965 case SpillArea::FPCXT:
966 FPCXTSaveSize += 4;
967 break;
969 GPRCS1Size += 4;
970 break;
972 GPRCS2Size += 4;
973 break;
975 FPStatusSize += 4;
976 break;
978 DPRCS1Size += 8;
979 break;
981 GPRCS3Size += 4;
982 break;
984 DPRCS2Size += 8;
985 break;
986 }
987 }
988
989 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
990 DPRCS1Push, GPRCS3Push;
991
992 // Move past the PAC computation.
993 if (AFI->shouldSignReturnAddress())
994 LastPush = MBBI++;
995
996 // Move past FPCXT area.
997 if (FPCXTSaveSize > 0) {
998 LastPush = MBBI++;
999 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
1000 }
1001
1002 // Allocate the vararg register save area.
1003 if (ArgRegsSaveSize) {
1004 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
1006 LastPush = std::prev(MBBI);
1007 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
1008 }
1009
1010 // Move past area 1.
1011 if (GPRCS1Size > 0) {
1012 GPRCS1Push = LastPush = MBBI++;
1013 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
1014 if (FramePtrSpillArea == SpillArea::GPRCS1)
1015 BeforeFPPush = false;
1016 }
1017
1018 // Determine starting offsets of spill areas. These offsets are all positive
1019 // offsets from the bottom of the lowest-addressed callee-save area
1020 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1021 // of the spill area in question.
1022 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1023 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1024 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1025 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1026
1027 Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4);
1028 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1029 GPRCS2Size + FPStatusSize) %
1030 DPRAlign.value();
1031
1032 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1033
1034 if (HasFP) {
1035 // Offset from the CFA to the saved frame pointer, will be negative.
1036 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
1037 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1038 << ", FPOffset: " << FPOffset << "\n");
1039 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1040 "Max FP estimation is wrong");
1041 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
1042 NumBytes);
1043 }
1044 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1045 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1046 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1047
1048 // Move past area 2.
1049 if (GPRCS2Size > 0) {
1051 GPRCS2Push = LastPush = MBBI++;
1052 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1053 if (FramePtrSpillArea == SpillArea::GPRCS2)
1054 BeforeFPPush = false;
1055 }
1056
1057 // Move past FP status save area.
1058 if (FPStatusSize > 0) {
1059 while (MBBI != MBB.end()) {
1060 unsigned Opc = MBBI->getOpcode();
1061 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1062 MBBI++;
1063 else
1064 break;
1065 }
1066 LastPush = MBBI++;
1067 DefCFAOffsetCandidates.addInst(LastPush, FPStatusSize);
1068 }
1069
1070 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1071 // .cfi_offset operations will reflect that.
1072 if (DPRGapSize) {
1073 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1074 if (LastPush != MBB.end() &&
1075 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
1076 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
1077 else {
1078 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
1080 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
1081 }
1082 }
1083
1084 // Move past DPRCS1Size.
1085 if (DPRCS1Size > 0) {
1086 // Since vpush register list cannot have gaps, there may be multiple vpush
1087 // instructions in the prologue.
1088 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1089 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
1090 BeforeFPPush);
1091 DPRCS1Push = LastPush = MBBI++;
1092 }
1093 }
1094
1095 // Move past the aligned DPRCS2 area.
1096 if (DPRCS2Size > 0) {
1098 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1099 // leaves the stack pointer pointing to the DPRCS2 area.
1100 //
1101 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1102 NumBytes += MFI.getObjectOffset(D8SpillFI);
1103 } else
1104 NumBytes = DPRCS1Offset;
1105
1106 // Move GPRCS3, if using using SplitR11WindowsSEH.
1107 if (GPRCS3Size > 0) {
1109 GPRCS3Push = LastPush = MBBI++;
1110 DefCFAOffsetCandidates.addInst(LastPush, GPRCS3Size, BeforeFPPush);
1111 if (FramePtrSpillArea == SpillArea::GPRCS3)
1112 BeforeFPPush = false;
1113 }
1114
1115 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1116 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1117 NeedsWinCFIStackAlloc = false;
1118
1119 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
1120 uint32_t NumWords = NumBytes >> 2;
1121
1122 if (NumWords < 65536) {
1123 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1124 .addImm(NumWords)
1127 } else {
1128 // Split into two instructions here, instead of using t2MOVi32imm,
1129 // to allow inserting accurate SEH instructions (including accurate
1130 // instruction size for each of them).
1131 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1132 .addImm(NumWords & 0xffff)
1135 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
1136 .addReg(ARM::R4)
1137 .addImm(NumWords >> 16)
1140 }
1141
1142 const ARMTargetLowering *TLI = STI.getTargetLowering();
1143 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(RTLIB::STACK_PROBE);
1144 if (ChkStkLibcall == RTLIB::Unsupported)
1145 reportFatalUsageError("no available implementation of __chkstk");
1146 const char *ChkStk = TLI->getLibcallImplName(ChkStkLibcall).data();
1147
1148 switch (TM.getCodeModel()) {
1149 case CodeModel::Tiny:
1150 llvm_unreachable("Tiny code model not available on ARM.");
1151 case CodeModel::Small:
1152 case CodeModel::Medium:
1153 case CodeModel::Kernel:
1154 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1156 .addExternalSymbol(ChkStk)
1157 .addReg(ARM::R4, RegState::Implicit)
1159 break;
1160 case CodeModel::Large:
1161 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1162 .addExternalSymbol(ChkStk)
1164
1165 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1167 .addReg(ARM::R12, RegState::Kill)
1168 .addReg(ARM::R4, RegState::Implicit)
1170 break;
1171 }
1172
1173 MachineInstrBuilder Instr, SEH;
1174 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1175 .addReg(ARM::SP, RegState::Kill)
1176 .addReg(ARM::R4, RegState::Kill)
1179 .add(condCodeOp());
1180 if (NeedsWinCFIStackAlloc) {
1181 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1182 .addImm(NumBytes)
1183 .addImm(/*Wide=*/1)
1185 MBB.insertAfter(Instr, SEH);
1186 }
1187 NumBytes = 0;
1188 }
1189
1190 if (NumBytes) {
1191 // Adjust SP after all the callee-save spills.
1192 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1193 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1194 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1195 else {
1196 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1198 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1199 }
1200
1201 if (HasFP && isARM)
1202 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1203 // Note it's not safe to do this in Thumb2 mode because it would have
1204 // taken two instructions:
1205 // mov sp, r7
1206 // sub sp, #24
1207 // If an interrupt is taken between the two instructions, then sp is in
1208 // an inconsistent state (pointing to the middle of callee-saved area).
1209 // The interrupt handler can end up clobbering the registers.
1210 AFI->setShouldRestoreSPFromFP(true);
1211 }
1212
1213 // Set FP to point to the stack slot that contains the previous FP.
1214 // For iOS, FP is R7, which has now been stored in spill area 1.
1215 // Otherwise, if this is not iOS, all the callee-saved registers go
1216 // into spill area 1, including the FP in R11. In either case, it
1217 // is in area one and the adjustment needs to take place just after
1218 // that push.
1220 if (HasFP) {
1221 MachineBasicBlock::iterator FPPushInst;
1222 // Offset from SP immediately after the push which saved the FP to the FP
1223 // save slot.
1224 int64_t FPOffsetAfterPush;
1225 switch (FramePtrSpillArea) {
1226 case SpillArea::GPRCS1:
1227 FPPushInst = GPRCS1Push;
1228 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1229 ArgRegsSaveSize + FPCXTSaveSize +
1230 sizeOfSPAdjustment(*FPPushInst);
1231 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1232 << FPOffsetAfterPush << " after that push\n");
1233 break;
1234 case SpillArea::GPRCS2:
1235 FPPushInst = GPRCS2Push;
1236 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1237 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1238 sizeOfSPAdjustment(*FPPushInst);
1239 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1240 << FPOffsetAfterPush << " after that push\n");
1241 break;
1242 case SpillArea::GPRCS3:
1243 FPPushInst = GPRCS3Push;
1244 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1245 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1246 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1247 sizeOfSPAdjustment(*FPPushInst);
1248 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1249 << FPOffsetAfterPush << " after that push\n");
1250 break;
1251 default:
1252 llvm_unreachable("frame pointer in unknown spill area");
1253 break;
1254 }
1255 AfterPush = std::next(FPPushInst);
1256 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1257 assert(FPOffsetAfterPush == 0);
1258
1259 // Emit the MOV or ADD to set up the frame pointer register.
1260 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1261 FramePtr, ARM::SP, FPOffsetAfterPush,
1263
1264 if (!NeedsWinCFI) {
1265 // Emit DWARF info to find the CFA using the frame pointer from this
1266 // point onward.
1267 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1268 if (FPOffsetAfterPush != 0)
1269 CFIBuilder.buildDefCFA(FramePtr, -MFI.getObjectOffset(FramePtrSpillFI));
1270 else
1271 CFIBuilder.buildDefCFARegister(FramePtr);
1272 }
1273 }
1274
1275 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1276 // instructions below don't need to be replayed to unwind the stack.
1277 if (NeedsWinCFI && MBBI != MBB.begin()) {
1279 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1280 End = AfterPush;
1282 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1284 MF.setHasWinCFI(true);
1285 }
1286
1287 // Now that the prologue's actual instructions are finalised, we can insert
1288 // the necessary DWARF cf instructions to describe the situation. Start by
1289 // recording where each register ended up:
1290 if (!NeedsWinCFI) {
1291 for (const auto &Entry : reverse(CSI)) {
1292 MCRegister Reg = Entry.getReg();
1293 int FI = Entry.getFrameIdx();
1295 switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
1296 RegInfo)) {
1297 case SpillArea::GPRCS1:
1298 CFIPos = std::next(GPRCS1Push);
1299 break;
1300 case SpillArea::GPRCS2:
1301 CFIPos = std::next(GPRCS2Push);
1302 break;
1303 case SpillArea::DPRCS1:
1304 CFIPos = std::next(DPRCS1Push);
1305 break;
1306 case SpillArea::GPRCS3:
1307 CFIPos = std::next(GPRCS3Push);
1308 break;
1310 case SpillArea::FPCXT:
1311 case SpillArea::DPRCS2:
1312 // FPCXT and DPRCS2 are not represented in the DWARF info.
1313 break;
1314 }
1315
1316 if (CFIPos.isValid()) {
1318 .buildOffset(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1319 MFI.getObjectOffset(FI));
1320 }
1321 }
1322 }
1323
1324 // Now we can emit descriptions of where the canonical frame address was
1325 // throughout the process. If we have a frame pointer, it takes over the job
1326 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1327 // actually get emitted.
1328 if (!NeedsWinCFI) {
1329 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1330 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1331 }
1332
1333 if (STI.isTargetELF() && hasFP(MF))
1335 AFI->getFramePtrSpillOffset());
1336
1337 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1338 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1339 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1340 AFI->setFPStatusSavesSize(FPStatusSize);
1341 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1342 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1343 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1344
1345 // If we need dynamic stack realignment, do it here. Be paranoid and make
1346 // sure if we also have VLAs, we have a base pointer for frame access.
1347 // If aligned NEON registers were spilled, the stack has already been
1348 // realigned.
1349 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1350 Align MaxAlign = MFI.getMaxAlign();
1352 if (!AFI->isThumbFunction()) {
1353 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1354 false);
1355 } else {
1356 // We cannot use sp as source/dest register here, thus we're using r4 to
1357 // perform the calculations. We're emitting the following sequence:
1358 // mov r4, sp
1359 // -- use emitAligningInstructions to produce best sequence to zero
1360 // -- out lower bits in r4
1361 // mov sp, r4
1362 // FIXME: It will be better just to find spare register here.
1363 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1364 .addReg(ARM::SP, RegState::Kill)
1366 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1367 false);
1368 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1369 .addReg(ARM::R4, RegState::Kill)
1371 }
1372
1373 AFI->setShouldRestoreSPFromFP(true);
1374 }
1375
1376 // If we need a base pointer, set it up here. It's whatever the value
1377 // of the stack pointer is at this point. Any variable size objects
1378 // will be allocated after this, so we can still use the base pointer
1379 // to reference locals.
1380 // FIXME: Clarify FrameSetup flags here.
1381 if (RegInfo->hasBasePointer(MF)) {
1382 if (isARM)
1383 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1384 .addReg(ARM::SP)
1386 .add(condCodeOp());
1387 else
1388 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1389 .addReg(ARM::SP)
1391 }
1392
1393 // If the frame has variable sized objects then the epilogue must restore
1394 // the sp from fp. We can assume there's an FP here since hasFP already
1395 // checks for hasVarSizedObjects.
1396 if (MFI.hasVarSizedObjects())
1397 AFI->setShouldRestoreSPFromFP(true);
1398}
1399
1401 MachineBasicBlock &MBB) const {
1402 MachineFrameInfo &MFI = MF.getFrameInfo();
1404 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1405 const ARMBaseInstrInfo &TII =
1406 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1407 assert(!AFI->isThumb1OnlyFunction() &&
1408 "This emitEpilogue does not support Thumb1!");
1409 bool isARM = !AFI->isThumbFunction();
1411 STI.getPushPopSplitVariation(MF);
1412
1413 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1414
1415 // Amount of stack space we reserved next to incoming args for either
1416 // varargs registers or stack arguments in tail calls made by this function.
1417 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1418
1419 // How much of the stack used by incoming arguments this function is expected
1420 // to restore in this particular epilogue.
1421 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1422 int NumBytes = (int)MFI.getStackSize();
1423 Register FramePtr = RegInfo->getFrameRegister(MF);
1424
1425 // All calls are tail calls in GHC calling conv, and functions have no
1426 // prologue/epilogue.
1428 return;
1429
1430 // First put ourselves on the first (from top) terminator instructions.
1431 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1432 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1433
1434 MachineBasicBlock::iterator RangeStart;
1435 if (!AFI->hasStackFrame()) {
1436 if (MF.hasWinCFI()) {
1437 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1439 RangeStart = initMBBRange(MBB, MBBI);
1440 }
1441
1442 if (NumBytes + IncomingArgStackToRestore != 0)
1443 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1444 NumBytes + IncomingArgStackToRestore,
1446 } else {
1447 // Unwind MBBI to point to first LDR / VLDRD.
1448 if (MBBI != MBB.begin()) {
1449 do {
1450 --MBBI;
1451 } while (MBBI != MBB.begin() &&
1453 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1454 ++MBBI;
1455 }
1456
1457 if (MF.hasWinCFI()) {
1458 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1460 RangeStart = initMBBRange(MBB, MBBI);
1461 }
1462
1463 // Move SP to start of FP callee save spill area.
1464 NumBytes -=
1465 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1469
1470 // Reset SP based on frame pointer only if the stack frame extends beyond
1471 // frame pointer stack slot or target is ELF and the function has FP.
1472 if (AFI->shouldRestoreSPFromFP()) {
1473 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1474 if (NumBytes) {
1475 if (isARM)
1476 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1477 ARMCC::AL, 0, TII,
1479 else {
1480 // It's not possible to restore SP from FP in a single instruction.
1481 // For iOS, this looks like:
1482 // mov sp, r7
1483 // sub sp, #24
1484 // This is bad, if an interrupt is taken after the mov, sp is in an
1485 // inconsistent state.
1486 // Use the first callee-saved register as a scratch register.
1487 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1488 "No scratch register to restore SP from FP!");
1489 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1491 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1492 .addReg(ARM::R4)
1495 }
1496 } else {
1497 // Thumb2 or ARM.
1498 if (isARM)
1499 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1502 .add(condCodeOp())
1504 else
1505 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1509 }
1510 } else if (NumBytes &&
1511 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1512 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1514
1515 // Increment past our save areas.
1516 if (AFI->getGPRCalleeSavedArea3Size()) {
1518 (void)PushPopSplit;
1519 MBBI++;
1520 }
1521
1522 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1523 MBBI++;
1524 // Since vpop register list cannot have gaps, there may be multiple vpop
1525 // instructions in the epilogue.
1526 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1527 MBBI++;
1528 }
1529 if (AFI->getDPRCalleeSavedGapSize()) {
1530 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1531 "unexpected DPR alignment gap");
1532 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1534 }
1535
1536 if (AFI->getGPRCalleeSavedArea2Size()) {
1538 (void)PushPopSplit;
1539 MBBI++;
1540 }
1541 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1542
1543 if (ReservedArgStack || IncomingArgStackToRestore) {
1544 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1545 "attempting to restore negative stack amount");
1546 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1547 ReservedArgStack + IncomingArgStackToRestore,
1549 }
1550
1551 // Validate PAC, It should have been already popped into R12. For CMSE entry
1552 // function, the validation instruction is emitted during expansion of the
1553 // tBXNS_RET, since the validation must use the value of SP at function
1554 // entry, before saving, resp. after restoring, FPCXTNS.
1555 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) {
1556 bool CanUseBXAut =
1557 STI.isThumb() && STI.hasV8_1MMainlineOps() && STI.hasPACBTI();
1558 auto TMBBI = MBB.getFirstTerminator();
1559 bool IsBXReturn =
1560 TMBBI != MBB.end() && TMBBI->getOpcode() == ARM::tBX_RET;
1561 if (IsBXReturn && CanUseBXAut)
1562 TMBBI->setDesc(STI.getInstrInfo()->get(ARM::t2BXAUT_RET));
1563 else
1564 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1565 }
1566 }
1567
1568 if (MF.hasWinCFI()) {
1569 insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
1570 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1572 }
1573}
1574
1575/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1576/// debug info. It's the same as what we use for resolving the code-gen
1577/// references for now. FIXME: This can go wrong when references are
1578/// SP-relative and simple call frames aren't used.
1580 int FI,
1581 Register &FrameReg) const {
1582 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1583}
1584
1586 int FI, Register &FrameReg,
1587 int SPAdj) const {
1588 const MachineFrameInfo &MFI = MF.getFrameInfo();
1589 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1590 MF.getSubtarget().getRegisterInfo());
1591 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1592 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1593 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1594 bool isFixed = MFI.isFixedObjectIndex(FI);
1595
1596 FrameReg = ARM::SP;
1597 Offset += SPAdj;
1598
1599 // SP can move around if there are allocas. We may also lose track of SP
1600 // when emergency spilling inside a non-reserved call frame setup.
1601 bool hasMovingSP = !hasReservedCallFrame(MF);
1602
1603 // When dynamically realigning the stack, use the frame pointer for
1604 // parameters, and the stack/base pointer for locals.
1605 if (RegInfo->hasStackRealignment(MF)) {
1606 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1607 if (isFixed) {
1608 FrameReg = RegInfo->getFrameRegister(MF);
1609 Offset = FPOffset;
1610 } else if (hasMovingSP) {
1611 assert(RegInfo->hasBasePointer(MF) &&
1612 "VLAs and dynamic stack alignment, but missing base pointer!");
1613 FrameReg = RegInfo->getBaseRegister();
1614 Offset -= SPAdj;
1615 }
1616 return Offset;
1617 }
1618
1619 // If there is a frame pointer, use it when we can.
1620 if (hasFP(MF) && AFI->hasStackFrame()) {
1621 // Use frame pointer to reference fixed objects. Use it for locals if
1622 // there are VLAs (and thus the SP isn't reliable as a base).
1623 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1624 FrameReg = RegInfo->getFrameRegister(MF);
1625 return FPOffset;
1626 } else if (hasMovingSP) {
1627 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1628 if (AFI->isThumb2Function()) {
1629 // Try to use the frame pointer if we can, else use the base pointer
1630 // since it's available. This is handy for the emergency spill slot, in
1631 // particular.
1632 if (FPOffset >= -255 && FPOffset < 0) {
1633 FrameReg = RegInfo->getFrameRegister(MF);
1634 return FPOffset;
1635 }
1636 }
1637 } else if (AFI->isThumbFunction()) {
1638 // Prefer SP to base pointer, if the offset is suitably aligned and in
1639 // range as the effective range of the immediate offset is bigger when
1640 // basing off SP.
1641 // Use add <rd>, sp, #<imm8>
1642 // ldr <rd>, [sp, #<imm8>]
1643 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1644 return Offset;
1645 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1646 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1647 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1648 FrameReg = RegInfo->getFrameRegister(MF);
1649 return FPOffset;
1650 }
1651 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1652 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1653 FrameReg = RegInfo->getFrameRegister(MF);
1654 return FPOffset;
1655 }
1656 }
1657 // Use the base pointer if we have one.
1658 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1659 // That can happen if we forced a base pointer for a large call frame.
1660 if (RegInfo->hasBasePointer(MF)) {
1661 FrameReg = RegInfo->getBaseRegister();
1662 Offset -= SPAdj;
1663 }
1664 return Offset;
1665}
1666
1667void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1670 unsigned StmOpc, unsigned StrOpc,
1671 bool NoGap,
1672 function_ref<bool(unsigned)> Func) const {
1673 MachineFunction &MF = *MBB.getParent();
1676
1677 DebugLoc DL;
1678
1679 using RegAndKill = std::pair<unsigned, bool>;
1680
1682 unsigned i = CSI.size();
1683 while (i != 0) {
1684 unsigned LastReg = 0;
1685 for (; i != 0; --i) {
1686 MCRegister Reg = CSI[i-1].getReg();
1687 if (!Func(Reg))
1688 continue;
1689
1690 const MachineRegisterInfo &MRI = MF.getRegInfo();
1691 bool isLiveIn = MRI.isLiveIn(Reg);
1692 if (!isLiveIn && !MRI.isReserved(Reg))
1693 MBB.addLiveIn(Reg);
1694 // If NoGap is true, push consecutive registers and then leave the rest
1695 // for other instructions. e.g.
1696 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1697 if (NoGap && LastReg && LastReg != Reg-1)
1698 break;
1699 LastReg = Reg;
1700 // Do not set a kill flag on values that are also marked as live-in. This
1701 // happens with the @llvm-returnaddress intrinsic and with arguments
1702 // passed in callee saved registers.
1703 // Omitting the kill flags is conservatively correct even if the live-in
1704 // is not used after all.
1705 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1706 }
1707
1708 if (Regs.empty())
1709 continue;
1710
1711 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1712 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1713 });
1714
1715 if (Regs.size() > 1 || StrOpc== 0) {
1716 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1717 .addReg(ARM::SP)
1720 for (const auto &[Reg, Kill] : Regs)
1722 } else if (Regs.size() == 1) {
1723 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1724 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1725 .addReg(ARM::SP)
1727 .addImm(-4)
1729 }
1730 Regs.clear();
1731
1732 // Put any subsequent vpush instructions before this one: they will refer to
1733 // higher register numbers so need to be pushed first in order to preserve
1734 // monotonicity.
1735 if (MI != MBB.begin())
1736 --MI;
1737 }
1738}
1739
1740void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1743 unsigned LdmOpc, unsigned LdrOpc,
1744 bool isVarArg, bool NoGap,
1745 function_ref<bool(unsigned)> Func) const {
1746 MachineFunction &MF = *MBB.getParent();
1747 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1748 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1749 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1750 bool hasPAC = AFI->shouldSignReturnAddress();
1751 DebugLoc DL;
1752 bool isTailCall = false;
1753 bool isInterrupt = false;
1754 bool isTrap = false;
1755 bool isCmseEntry = false;
1757 STI.getPushPopSplitVariation(MF);
1758 if (MBB.end() != MI) {
1759 DL = MI->getDebugLoc();
1760 unsigned RetOpcode = MI->getOpcode();
1761 isTailCall =
1762 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1763 RetOpcode == ARM::TCRETURNrinotr12);
1764 isInterrupt =
1765 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1766 isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::tTRAP;
1767 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1768 }
1769
1770 SmallVector<unsigned, 4> Regs;
1771 unsigned i = CSI.size();
1772 while (i != 0) {
1773 unsigned LastReg = 0;
1774 bool DeleteRet = false;
1775 for (; i != 0; --i) {
1776 CalleeSavedInfo &Info = CSI[i-1];
1777 MCRegister Reg = Info.getReg();
1778 if (!Func(Reg))
1779 continue;
1780
1781 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1782 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1783 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1784 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1785 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1786 Reg = ARM::PC;
1787 // Fold the return instruction into the LDM.
1788 DeleteRet = true;
1789 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1790 }
1791
1792 // If NoGap is true, pop consecutive registers and then leave the rest
1793 // for other instructions. e.g.
1794 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1795 if (NoGap && LastReg && LastReg != Reg-1)
1796 break;
1797
1798 LastReg = Reg;
1799 Regs.push_back(Reg);
1800 }
1801
1802 if (Regs.empty())
1803 continue;
1804
1805 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1806 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1807 });
1808
1809 if (Regs.size() > 1 || LdrOpc == 0) {
1810 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1811 .addReg(ARM::SP)
1814 for (unsigned Reg : Regs)
1815 MIB.addReg(Reg, getDefRegState(true));
1816 if (DeleteRet) {
1817 if (MI != MBB.end()) {
1818 MIB.copyImplicitOps(*MI);
1819 MI->eraseFromParent();
1820 }
1821 }
1822 MI = MIB;
1823 } else if (Regs.size() == 1) {
1824 // If we adjusted the reg to PC from LR above, switch it back here. We
1825 // only do that for LDM.
1826 if (Regs[0] == ARM::PC)
1827 Regs[0] = ARM::LR;
1828 MachineInstrBuilder MIB =
1829 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1830 .addReg(ARM::SP, RegState::Define)
1831 .addReg(ARM::SP)
1833 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1834 // that refactoring is complete (eventually).
1835 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1836 MIB.addReg(0);
1838 } else
1839 MIB.addImm(4);
1840 MIB.add(predOps(ARMCC::AL));
1841 }
1842 Regs.clear();
1843
1844 // Put any subsequent vpop instructions after this one: they will refer to
1845 // higher register numbers so need to be popped afterwards.
1846 if (MI != MBB.end())
1847 ++MI;
1848 }
1849}
1850
1851void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1854 unsigned PushOpc) const {
1855 MachineFunction &MF = *MBB.getParent();
1856 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1857
1859 auto RegPresent = [&CSI](MCRegister Reg) {
1860 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1861 return C.getReg() == Reg;
1862 });
1863 };
1864
1865 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1866 // instruction.
1867 if (RegPresent(ARM::FPSCR)) {
1868 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS), ARM::R4)
1871
1872 Regs.push_back(ARM::R4);
1873 }
1874
1875 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1876 // VMRS_FPEXC instruction.
1877 if (RegPresent(ARM::FPEXC)) {
1878 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS_FPEXC), ARM::R5)
1881
1882 Regs.push_back(ARM::R5);
1883 }
1884
1885 // If neither FPSCR and FPEXC are present, then do nothing.
1886 if (Regs.size() == 0)
1887 return;
1888
1889 // Push both R4 and R5 onto the stack, if present.
1890 MachineInstrBuilder MIB =
1891 BuildMI(MBB, MI, DebugLoc(), TII.get(PushOpc), ARM::SP)
1892 .addReg(ARM::SP)
1895
1896 for (Register Reg : Regs) {
1897 MIB.addReg(Reg);
1898 }
1899}
1900
1901void ARMFrameLowering::emitFPStatusRestores(
1903 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1904 MachineFunction &MF = *MBB.getParent();
1905 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1906
1907 auto RegPresent = [&CSI](MCRegister Reg) {
1908 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1909 return C.getReg() == Reg;
1910 });
1911 };
1912
1913 // Do nothing if we don't need to restore any FP status registers.
1914 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1915 return;
1916
1917 // Pop registers off of the stack.
1918 MachineInstrBuilder MIB =
1919 BuildMI(MBB, MI, DebugLoc(), TII.get(LdmOpc), ARM::SP)
1920 .addReg(ARM::SP)
1923
1924 // If FPSCR was saved, it will be popped into R4.
1925 if (RegPresent(ARM::FPSCR)) {
1926 MIB.addReg(ARM::R4, RegState::Define);
1927 }
1928
1929 // If FPEXC was saved, it will be popped into R5.
1930 if (RegPresent(ARM::FPEXC)) {
1931 MIB.addReg(ARM::R5, RegState::Define);
1932 }
1933
1934 // Move the FPSCR value back into the register with the VMSR instruction.
1935 if (RegPresent(ARM::FPSCR)) {
1936 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR))
1937 .addReg(ARM::R4)
1940 }
1941
1942 // Move the FPEXC value back into the register with the VMSR_FPEXC
1943 // instruction.
1944 if (RegPresent(ARM::FPEXC)) {
1945 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR_FPEXC))
1946 .addReg(ARM::R5)
1949 }
1950}
1951
1952/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1953/// starting from d8. Also insert stack realignment code and leave the stack
1954/// pointer pointing to the d8 spill slot.
1957 unsigned NumAlignedDPRCS2Regs,
1959 const TargetRegisterInfo *TRI) {
1960 MachineFunction &MF = *MBB.getParent();
1962 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1964 MachineFrameInfo &MFI = MF.getFrameInfo();
1965
1966 // Mark the D-register spill slots as properly aligned. Since MFI computes
1967 // stack slot layout backwards, this can actually mean that the d-reg stack
1968 // slot offsets can be wrong. The offset for d8 will always be correct.
1969 for (const CalleeSavedInfo &I : CSI) {
1970 unsigned DNum = I.getReg() - ARM::D8;
1971 if (DNum > NumAlignedDPRCS2Regs - 1)
1972 continue;
1973 int FI = I.getFrameIdx();
1974 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1975 // registers will be 8-byte aligned.
1976 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1977
1978 // The stack slot for D8 needs to be maximally aligned because this is
1979 // actually the point where we align the stack pointer. MachineFrameInfo
1980 // computes all offsets relative to the incoming stack pointer which is a
1981 // bit weird when realigning the stack. Any extra padding for this
1982 // over-alignment is not realized because the code inserted below adjusts
1983 // the stack pointer by numregs * 8 before aligning the stack pointer.
1984 if (DNum == 0)
1985 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1986 }
1987
1988 // Move the stack pointer to the d8 spill slot, and align it at the same
1989 // time. Leave the stack slot address in the scratch register r4.
1990 //
1991 // sub r4, sp, #numregs * 8
1992 // bic r4, r4, #align - 1
1993 // mov sp, r4
1994 //
1995 bool isThumb = AFI->isThumbFunction();
1996 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1997 AFI->setShouldRestoreSPFromFP(true);
1998
1999 // sub r4, sp, #numregs * 8
2000 // The immediate is <= 64, so it doesn't need any special encoding.
2001 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
2002 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
2003 .addReg(ARM::SP)
2004 .addImm(8 * NumAlignedDPRCS2Regs)
2006 .add(condCodeOp());
2007
2008 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
2009 // We must set parameter MustBeSingleInstruction to true, since
2010 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
2011 // stack alignment. Luckily, this can always be done since all ARM
2012 // architecture versions that support Neon also support the BFC
2013 // instruction.
2014 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
2015
2016 // mov sp, r4
2017 // The stack pointer must be adjusted before spilling anything, otherwise
2018 // the stack slots could be clobbered by an interrupt handler.
2019 // Leave r4 live, it is used below.
2020 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2021 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
2022 .addReg(ARM::R4)
2024 if (!isThumb)
2025 MIB.add(condCodeOp());
2026
2027 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2028 // r4 holds the stack slot address.
2029 unsigned NextReg = ARM::D8;
2030
2031 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2032 // The writeback is only needed when emitting two vst1.64 instructions.
2033 if (NumAlignedDPRCS2Regs >= 6) {
2034 MCRegister SupReg =
2035 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2036 MBB.addLiveIn(SupReg);
2037 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
2038 .addReg(ARM::R4, RegState::Kill)
2039 .addImm(16)
2040 .addReg(NextReg)
2043 NextReg += 4;
2044 NumAlignedDPRCS2Regs -= 4;
2045 }
2046
2047 // We won't modify r4 beyond this point. It currently points to the next
2048 // register to be spilled.
2049 unsigned R4BaseReg = NextReg;
2050
2051 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2052 if (NumAlignedDPRCS2Regs >= 4) {
2053 MCRegister SupReg =
2054 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2055 MBB.addLiveIn(SupReg);
2056 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
2057 .addReg(ARM::R4)
2058 .addImm(16)
2059 .addReg(NextReg)
2062 NextReg += 4;
2063 NumAlignedDPRCS2Regs -= 4;
2064 }
2065
2066 // 16-byte aligned vst1.64 with 2 d-regs.
2067 if (NumAlignedDPRCS2Regs >= 2) {
2068 MCRegister SupReg =
2069 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2070 MBB.addLiveIn(SupReg);
2071 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
2072 .addReg(ARM::R4)
2073 .addImm(16)
2074 .addReg(SupReg)
2076 NextReg += 2;
2077 NumAlignedDPRCS2Regs -= 2;
2078 }
2079
2080 // Finally, use a vanilla vstr.64 for the odd last register.
2081 if (NumAlignedDPRCS2Regs) {
2082 MBB.addLiveIn(NextReg);
2083 // vstr.64 uses addrmode5 which has an offset scale of 4.
2084 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
2085 .addReg(NextReg)
2086 .addReg(ARM::R4)
2087 .addImm((NextReg - R4BaseReg) * 2)
2089 }
2090
2091 // The last spill instruction inserted should kill the scratch register r4.
2092 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2093}
2094
2095/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2096/// iterator to the following instruction.
2099 unsigned NumAlignedDPRCS2Regs) {
2100 // sub r4, sp, #numregs * 8
2101 // bic r4, r4, #align - 1
2102 // mov sp, r4
2103 ++MI; ++MI; ++MI;
2104 assert(MI->mayStore() && "Expecting spill instruction");
2105
2106 // These switches all fall through.
2107 switch(NumAlignedDPRCS2Regs) {
2108 case 7:
2109 ++MI;
2110 assert(MI->mayStore() && "Expecting spill instruction");
2111 [[fallthrough]];
2112 default:
2113 ++MI;
2114 assert(MI->mayStore() && "Expecting spill instruction");
2115 [[fallthrough]];
2116 case 1:
2117 case 2:
2118 case 4:
2119 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2120 ++MI;
2121 }
2122 return MI;
2123}
2124
2125/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2126/// starting from d8. These instructions are assumed to execute while the
2127/// stack is still aligned, unlike the code inserted by emitPopInst.
2130 unsigned NumAlignedDPRCS2Regs,
2132 const TargetRegisterInfo *TRI) {
2133 MachineFunction &MF = *MBB.getParent();
2135 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2137
2138 // Find the frame index assigned to d8.
2139 int D8SpillFI = 0;
2140 for (const CalleeSavedInfo &I : CSI)
2141 if (I.getReg() == ARM::D8) {
2142 D8SpillFI = I.getFrameIdx();
2143 break;
2144 }
2145
2146 // Materialize the address of the d8 spill slot into the scratch register r4.
2147 // This can be fairly complicated if the stack frame is large, so just use
2148 // the normal frame index elimination mechanism to do it. This code runs as
2149 // the initial part of the epilog where the stack and base pointers haven't
2150 // been changed yet.
2151 bool isThumb = AFI->isThumbFunction();
2152 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2153
2154 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2155 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
2156 .addFrameIndex(D8SpillFI)
2157 .addImm(0)
2159 .add(condCodeOp());
2160
2161 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2162 unsigned NextReg = ARM::D8;
2163
2164 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2165 if (NumAlignedDPRCS2Regs >= 6) {
2166 MCRegister SupReg =
2167 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2168 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
2169 .addReg(ARM::R4, RegState::Define)
2170 .addReg(ARM::R4, RegState::Kill)
2171 .addImm(16)
2174 NextReg += 4;
2175 NumAlignedDPRCS2Regs -= 4;
2176 }
2177
2178 // We won't modify r4 beyond this point. It currently points to the next
2179 // register to be spilled.
2180 unsigned R4BaseReg = NextReg;
2181
2182 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2183 if (NumAlignedDPRCS2Regs >= 4) {
2184 MCRegister SupReg =
2185 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2186 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
2187 .addReg(ARM::R4)
2188 .addImm(16)
2191 NextReg += 4;
2192 NumAlignedDPRCS2Regs -= 4;
2193 }
2194
2195 // 16-byte aligned vld1.64 with 2 d-regs.
2196 if (NumAlignedDPRCS2Regs >= 2) {
2197 MCRegister SupReg =
2198 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2199 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
2200 .addReg(ARM::R4)
2201 .addImm(16)
2203 NextReg += 2;
2204 NumAlignedDPRCS2Regs -= 2;
2205 }
2206
2207 // Finally, use a vanilla vldr.64 for the remaining odd register.
2208 if (NumAlignedDPRCS2Regs)
2209 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
2210 .addReg(ARM::R4)
2211 .addImm(2 * (NextReg - R4BaseReg))
2213
2214 // Last store kills r4.
2215 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2216}
2217
2221 if (CSI.empty())
2222 return false;
2223
2224 MachineFunction &MF = *MBB.getParent();
2227 STI.getPushPopSplitVariation(MF);
2228 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2229
2230 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2231 unsigned PushOneOpc = AFI->isThumbFunction() ?
2232 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2233 unsigned FltOpc = ARM::VSTMDDB_UPD;
2234 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2235 // Compute PAC in R12.
2236 if (AFI->shouldSignReturnAddress()) {
2237 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
2239 }
2240 // Save the non-secure floating point context.
2241 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2242 return C.getReg() == ARM::FPCXTNS;
2243 })) {
2244 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2245 ARM::SP)
2246 .addReg(ARM::SP)
2247 .addImm(-4)
2249 }
2250
2251 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2252 RegInfo](unsigned Reg, SpillArea TestArea) {
2253 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2254 TestArea;
2255 };
2256 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2257 return CheckRegArea(Reg, SpillArea::GPRCS1);
2258 };
2259 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2260 return CheckRegArea(Reg, SpillArea::GPRCS2);
2261 };
2262 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2263 return CheckRegArea(Reg, SpillArea::DPRCS1);
2264 };
2265 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2266 return CheckRegArea(Reg, SpillArea::GPRCS3);
2267 };
2268
2269 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
2270 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
2271 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2272 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
2273 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS3);
2274
2275 // The code above does not insert spill code for the aligned DPRCS2 registers.
2276 // The stack realignment code will be inserted between the push instructions
2277 // and these spills.
2278 if (NumAlignedDPRCS2Regs)
2279 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2280
2281 return true;
2282}
2283
2287 if (CSI.empty())
2288 return false;
2289
2290 MachineFunction &MF = *MBB.getParent();
2292 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2293
2294 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2295 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2297 STI.getPushPopSplitVariation(MF);
2298
2299 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2300 // registers. Do that here instead.
2301 if (NumAlignedDPRCS2Regs)
2302 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2303
2304 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2305 unsigned LdrOpc =
2306 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2307 unsigned FltOpc = ARM::VLDMDIA_UPD;
2308
2309 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2310 RegInfo](unsigned Reg, SpillArea TestArea) {
2311 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2312 TestArea;
2313 };
2314 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2315 return CheckRegArea(Reg, SpillArea::GPRCS1);
2316 };
2317 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2318 return CheckRegArea(Reg, SpillArea::GPRCS2);
2319 };
2320 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2321 return CheckRegArea(Reg, SpillArea::DPRCS1);
2322 };
2323 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2324 return CheckRegArea(Reg, SpillArea::GPRCS3);
2325 };
2326
2327 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS3);
2328 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
2329 emitFPStatusRestores(MBB, MI, CSI, PopOpc);
2330 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
2331 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
2332
2333 return true;
2334}
2335
2336// FIXME: Make generic?
2338 const ARMBaseInstrInfo &TII) {
2339 unsigned FnSize = 0;
2340 for (auto &MBB : MF) {
2341 for (auto &MI : MBB)
2342 FnSize += TII.getInstSizeInBytes(MI);
2343 }
2344 if (MF.getJumpTableInfo())
2345 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2346 FnSize += Table.MBBs.size() * 4;
2347 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2348 LLVM_DEBUG(dbgs() << "Estimated function size for " << MF.getName() << " = "
2349 << FnSize << " bytes\n");
2350 return FnSize;
2351}
2352
2353/// estimateRSStackSizeLimit - Look at each instruction that references stack
2354/// frames and return the stack size limit beyond which some of these
2355/// instructions will require a scratch register during their expansion later.
2356// FIXME: Move to TII?
2358 const TargetFrameLowering *TFI,
2359 bool &HasNonSPFrameIndex) {
2360 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2361 const ARMBaseInstrInfo &TII =
2362 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2363 unsigned Limit = (1 << 12) - 1;
2364 for (auto &MBB : MF) {
2365 for (auto &MI : MBB) {
2366 if (MI.isDebugInstr())
2367 continue;
2368 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2369 if (!MI.getOperand(i).isFI())
2370 continue;
2371
2372 // When using ADDri to get the address of a stack object, 255 is the
2373 // largest offset guaranteed to fit in the immediate offset.
2374 if (MI.getOpcode() == ARM::ADDri) {
2375 Limit = std::min(Limit, (1U << 8) - 1);
2376 break;
2377 }
2378 // t2ADDri will not require an extra register, it can reuse the
2379 // destination.
2380 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2381 break;
2382
2383 const MCInstrDesc &MCID = MI.getDesc();
2384 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i);
2385 if (RegClass && !RegClass->contains(ARM::SP))
2386 HasNonSPFrameIndex = true;
2387
2388 // Otherwise check the addressing mode.
2389 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2391 case ARMII::AddrMode2:
2392 // Default 12 bit limit.
2393 break;
2394 case ARMII::AddrMode3:
2396 Limit = std::min(Limit, (1U << 8) - 1);
2397 break;
2399 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2400 break;
2401 case ARMII::AddrMode5:
2404 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2405 break;
2407 // i12 supports only positive offset so these will be converted to
2408 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2409 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2410 Limit = std::min(Limit, (1U << 8) - 1);
2411 break;
2412 case ARMII::AddrMode4:
2413 case ARMII::AddrMode6:
2414 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2415 // immediate offset for stack references.
2416 return 0;
2418 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2419 break;
2421 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2422 break;
2424 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2425 break;
2426 default:
2427 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2428 }
2429 break; // At most one FI per instruction
2430 }
2431 }
2432 }
2433
2434 return Limit;
2435}
2436
2437// In functions that realign the stack, it can be an advantage to spill the
2438// callee-saved vector registers after realigning the stack. The vst1 and vld1
2439// instructions take alignment hints that can improve performance.
2440static void
2442 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2444 return;
2445
2446 // Naked functions don't spill callee-saved registers.
2447 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2448 return;
2449
2450 // We are planning to use NEON instructions vst1 / vld1.
2451 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2452 return;
2453
2454 // Don't bother if the default stack alignment is sufficiently high.
2456 return;
2457
2458 // Aligned spills require stack realignment.
2459 if (!static_cast<const ARMBaseRegisterInfo *>(
2461 return;
2462
2463 // We always spill contiguous d-registers starting from d8. Count how many
2464 // needs spilling. The register allocator will almost always use the
2465 // callee-saved registers in order, but it can happen that there are holes in
2466 // the range. Registers above the hole will be spilled to the standard DPRCS
2467 // area.
2468 unsigned NumSpills = 0;
2469 for (; NumSpills < 8; ++NumSpills)
2470 if (!SavedRegs.test(ARM::D8 + NumSpills))
2471 break;
2472
2473 // Don't do this for just one d-register. It's not worth it.
2474 if (NumSpills < 2)
2475 return;
2476
2477 // Spill the first NumSpills D-registers after realigning the stack.
2478 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2479
2480 // A scratch register is required for the vst1 / vld1 instructions.
2481 SavedRegs.set(ARM::R4);
2482}
2483
2485 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2486 // upon function entry (resp. restore it immediately before return)
2487 if (STI.hasV8_1MMainlineOps() &&
2489 return false;
2490
2491 // We are disabling shrinkwrapping for now when PAC is enabled, as
2492 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2493 // generated. A follow-up patch will fix this in a more performant manner.
2495 true /* SpillsLR */))
2496 return false;
2497
2498 return true;
2499}
2500
2502 const MachineFunction &MF) const {
2503 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2504 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2505}
2506
2507// Thumb1 may require a spill when storing to a frame index through FP (or any
2508// access with execute-only), for cases where FP is a high register (R11). This
2509// scans the function for cases where this may happen.
2511 const TargetFrameLowering &TFI) {
2512 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2513 if (!AFI->isThumb1OnlyFunction())
2514 return false;
2515
2516 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2517 for (const auto &MBB : MF)
2518 for (const auto &MI : MBB)
2519 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2520 STI.genExecuteOnly())
2521 for (const auto &Op : MI.operands())
2522 if (Op.isFI()) {
2523 Register Reg;
2524 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2525 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2526 return true;
2527 }
2528 return false;
2529}
2530
2532 BitVector &SavedRegs,
2533 RegScavenger *RS) const {
2535 // This tells PEI to spill the FP as if it is any other callee-save register
2536 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2537 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2538 // to combine multiple loads / stores.
2539 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2541 bool CS1Spilled = false;
2542 bool LRSpilled = false;
2543 unsigned NumGPRSpills = 0;
2544 unsigned NumFPRSpills = 0;
2545 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2546 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2547 const Function &F = MF.getFunction();
2548 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2549 MF.getSubtarget().getRegisterInfo());
2550 const ARMBaseInstrInfo &TII =
2551 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2553 MachineFrameInfo &MFI = MF.getFrameInfo();
2554 MachineRegisterInfo &MRI = MF.getRegInfo();
2556 (void)TRI; // Silence unused warning in non-assert builds.
2557 Register FramePtr = STI.getFramePointerReg();
2559 STI.getPushPopSplitVariation(MF);
2560
2561 // For a floating point interrupt, save these registers always, since LLVM
2562 // currently doesn't model reads/writes to these registers.
2563 if (F.hasFnAttribute("interrupt") && F.hasFnAttribute("save-fp")) {
2564 SavedRegs.set(ARM::FPSCR);
2565 SavedRegs.set(ARM::R4);
2566
2567 // This register will only be present on non-MClass registers.
2568 if (STI.isMClass()) {
2569 SavedRegs.reset(ARM::FPEXC);
2570 } else {
2571 SavedRegs.set(ARM::FPEXC);
2572 SavedRegs.set(ARM::R5);
2573 }
2574 }
2575
2576 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2577 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2578 // since it's not always possible to restore sp from fp in a single
2579 // instruction.
2580 // FIXME: It will be better just to find spare register here.
2581 if (AFI->isThumb2Function() &&
2582 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2583 SavedRegs.set(ARM::R4);
2584
2585 // If a stack probe will be emitted, spill R4 and LR, since they are
2586 // clobbered by the stack probe call.
2587 // This estimate should be a safe, conservative estimate. The actual
2588 // stack probe is enabled based on the size of the local objects;
2589 // this estimate also includes the varargs store size.
2590 if (STI.isTargetWindows() &&
2591 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2592 SavedRegs.set(ARM::R4);
2593 SavedRegs.set(ARM::LR);
2594 }
2595
2596 if (AFI->isThumb1OnlyFunction()) {
2597 // Spill LR if Thumb1 function uses variable length argument lists.
2598 if (AFI->getArgRegsSaveSize() > 0)
2599 SavedRegs.set(ARM::LR);
2600
2601 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2602 // requires stack alignment. We don't know for sure what the stack size
2603 // will be, but for this, an estimate is good enough. If there anything
2604 // changes it, it'll be a spill, which implies we've used all the registers
2605 // and so R4 is already used, so not marking it here will be OK.
2606 // FIXME: It will be better just to find spare register here.
2607 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2608 MFI.estimateStackSize(MF) > 508)
2609 SavedRegs.set(ARM::R4);
2610 }
2611
2612 // See if we can spill vector registers to aligned stack.
2613 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2614
2615 // Spill the BasePtr if it's used.
2616 if (RegInfo->hasBasePointer(MF))
2617 SavedRegs.set(RegInfo->getBaseRegister());
2618
2619 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2620 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2621 CanEliminateFrame = false;
2622
2623 // When return address signing is enabled R12 is treated as callee-saved.
2624 if (AFI->shouldSignReturnAddress())
2625 CanEliminateFrame = false;
2626
2627 // Don't spill FP if the frame can be eliminated. This is determined
2628 // by scanning the callee-save registers to see if any is modified.
2629 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2630 for (unsigned i = 0; CSRegs[i]; ++i) {
2631 unsigned Reg = CSRegs[i];
2632 bool Spilled = false;
2633 if (SavedRegs.test(Reg)) {
2634 Spilled = true;
2635 CanEliminateFrame = false;
2636 }
2637
2638 if (!ARM::GPRRegClass.contains(Reg)) {
2639 if (Spilled) {
2640 if (ARM::SPRRegClass.contains(Reg))
2641 NumFPRSpills++;
2642 else if (ARM::DPRRegClass.contains(Reg))
2643 NumFPRSpills += 2;
2644 else if (ARM::QPRRegClass.contains(Reg))
2645 NumFPRSpills += 4;
2646 }
2647 continue;
2648 }
2649
2650 if (Spilled) {
2651 NumGPRSpills++;
2652
2653 if (PushPopSplit != ARMSubtarget::SplitR7) {
2654 if (Reg == ARM::LR)
2655 LRSpilled = true;
2656 CS1Spilled = true;
2657 continue;
2658 }
2659
2660 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2661 switch (Reg) {
2662 case ARM::LR:
2663 LRSpilled = true;
2664 [[fallthrough]];
2665 case ARM::R0: case ARM::R1:
2666 case ARM::R2: case ARM::R3:
2667 case ARM::R4: case ARM::R5:
2668 case ARM::R6: case ARM::R7:
2669 CS1Spilled = true;
2670 break;
2671 default:
2672 break;
2673 }
2674 } else {
2675 if (PushPopSplit != ARMSubtarget::SplitR7) {
2676 UnspilledCS1GPRs.push_back(Reg);
2677 continue;
2678 }
2679
2680 switch (Reg) {
2681 case ARM::R0: case ARM::R1:
2682 case ARM::R2: case ARM::R3:
2683 case ARM::R4: case ARM::R5:
2684 case ARM::R6: case ARM::R7:
2685 case ARM::LR:
2686 UnspilledCS1GPRs.push_back(Reg);
2687 break;
2688 default:
2689 UnspilledCS2GPRs.push_back(Reg);
2690 break;
2691 }
2692 }
2693 }
2694
2695 bool ForceLRSpill = false;
2696 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2697 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2698 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2699 // use of BL to implement far jump.
2700 if (FnSize >= (1 << 11)) {
2701 CanEliminateFrame = false;
2702 ForceLRSpill = true;
2703 }
2704 }
2705
2706 // If any of the stack slot references may be out of range of an immediate
2707 // offset, make sure a register (or a spill slot) is available for the
2708 // register scavenger. Note that if we're indexing off the frame pointer, the
2709 // effective stack size is 4 bytes larger since the FP points to the stack
2710 // slot of the previous FP. Also, if we have variable sized objects in the
2711 // function, stack slot references will often be negative, and some of
2712 // our instructions are positive-offset only, so conservatively consider
2713 // that case to want a spill slot (or register) as well. Similarly, if
2714 // the function adjusts the stack pointer during execution and the
2715 // adjustments aren't already part of our stack size estimate, our offset
2716 // calculations may be off, so be conservative.
2717 // FIXME: We could add logic to be more precise about negative offsets
2718 // and which instructions will need a scratch register for them. Is it
2719 // worth the effort and added fragility?
2720 unsigned EstimatedStackSize =
2721 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2722
2723 // Determine biggest (positive) SP offset in MachineFrameInfo.
2724 int MaxFixedOffset = 0;
2725 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2726 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2727 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2728 }
2729
2730 bool HasFP = hasFP(MF);
2731 if (HasFP) {
2732 if (AFI->hasStackFrame())
2733 EstimatedStackSize += 4;
2734 } else {
2735 // If FP is not used, SP will be used to access arguments, so count the
2736 // size of arguments into the estimation.
2737 EstimatedStackSize += MaxFixedOffset;
2738 }
2739 EstimatedStackSize += 16; // For possible paddings.
2740
2741 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2742 bool HasNonSPFrameIndex = false;
2743 if (AFI->isThumb1OnlyFunction()) {
2744 // For Thumb1, don't bother to iterate over the function. The only
2745 // instruction that requires an emergency spill slot is a store to a
2746 // frame index.
2747 //
2748 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2749 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2750 // a 5-bit unsigned immediate.
2751 //
2752 // We could try to check if the function actually contains a tSTRspi
2753 // that might need the spill slot, but it's not really important.
2754 // Functions with VLAs or extremely large call frames are rare, and
2755 // if a function is allocating more than 1KB of stack, an extra 4-byte
2756 // slot probably isn't relevant.
2757 //
2758 // A special case is the scenario where r11 is used as FP, where accesses
2759 // to a frame index will require its value to be moved into a low reg.
2760 // This is handled later on, once we are able to determine if we have any
2761 // fp-relative accesses.
2762 if (RegInfo->hasBasePointer(MF))
2763 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2764 else
2765 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2766 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2767 } else {
2768 EstimatedRSStackSizeLimit =
2769 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2770 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2771 }
2772 // Final estimate of whether sp or bp-relative accesses might require
2773 // scavenging.
2774 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2775
2776 // If the stack pointer moves and we don't have a base pointer, the
2777 // estimate logic doesn't work. The actual offsets might be larger when
2778 // we're constructing a call frame, or we might need to use negative
2779 // offsets from fp.
2780 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2781 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2782 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2783
2784 // If we have a frame pointer, we assume arguments will be accessed
2785 // relative to the frame pointer. Check whether fp-relative accesses to
2786 // arguments require scavenging.
2787 //
2788 // We could do slightly better on Thumb1; in some cases, an sp-relative
2789 // offset would be legal even though an fp-relative offset is not.
2790 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2791 bool HasLargeArgumentList =
2792 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2793
2794 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2795 HasLargeArgumentList || HasNonSPFrameIndex;
2796 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2797 << "; EstimatedStack: " << EstimatedStackSize
2798 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2799 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2800 if (BigFrameOffsets ||
2801 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2802 AFI->setHasStackFrame(true);
2803
2804 // Save the FP if:
2805 // 1. We currently need it (HasFP), OR
2806 // 2. We might need it later due to stack realignment from aligned DPRCS2
2807 // saves (which will make hasFP() become true in emitPrologue).
2808 if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
2809 SavedRegs.set(FramePtr);
2810 // If the frame pointer is required by the ABI, also spill LR so that we
2811 // emit a complete frame record.
2812 if ((requiresAAPCSFrameRecord(MF) ||
2814 !LRSpilled) {
2815 SavedRegs.set(ARM::LR);
2816 LRSpilled = true;
2817 NumGPRSpills++;
2818 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2819 if (LRPos != UnspilledCS1GPRs.end())
2820 UnspilledCS1GPRs.erase(LRPos);
2821 }
2822 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2823 if (FPPos != UnspilledCS1GPRs.end())
2824 UnspilledCS1GPRs.erase(FPPos);
2825 NumGPRSpills++;
2826 if (FramePtr == ARM::R7)
2827 CS1Spilled = true;
2828 }
2829
2830 // This is the number of extra spills inserted for callee-save GPRs which
2831 // would not otherwise be used by the function. When greater than zero it
2832 // guaranteees that it is possible to scavenge a register to hold the
2833 // address of a stack slot. On Thumb1, the register must be a valid operand
2834 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2835 // or lr.
2836 //
2837 // If we don't insert a spill, we instead allocate an emergency spill
2838 // slot, which can be used by scavenging to spill an arbitrary register.
2839 //
2840 // We currently don't try to figure out whether any specific instruction
2841 // requires scavening an additional register.
2842 unsigned NumExtraCSSpill = 0;
2843
2844 if (AFI->isThumb1OnlyFunction()) {
2845 // For Thumb1-only targets, we need some low registers when we save and
2846 // restore the high registers (which aren't allocatable, but could be
2847 // used by inline assembly) because the push/pop instructions can not
2848 // access high registers. If necessary, we might need to push more low
2849 // registers to ensure that there is at least one free that can be used
2850 // for the saving & restoring, and preferably we should ensure that as
2851 // many as are needed are available so that fewer push/pop instructions
2852 // are required.
2853
2854 // Low registers which are not currently pushed, but could be (r4-r7).
2855 SmallVector<unsigned, 4> AvailableRegs;
2856
2857 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2858 // free.
2859 int EntryRegDeficit = 0;
2860 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2861 if (!MF.getRegInfo().isLiveIn(Reg)) {
2862 --EntryRegDeficit;
2864 << printReg(Reg, TRI)
2865 << " is unused argument register, EntryRegDeficit = "
2866 << EntryRegDeficit << "\n");
2867 }
2868 }
2869
2870 // Unused return registers can be clobbered in the epilogue for free.
2871 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2873 << " return regs used, ExitRegDeficit = "
2874 << ExitRegDeficit << "\n");
2875
2876 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2877 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2878
2879 // r4-r6 can be used in the prologue if they are pushed by the first push
2880 // instruction.
2881 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2882 if (SavedRegs.test(Reg)) {
2883 --RegDeficit;
2884 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2885 << " is saved low register, RegDeficit = "
2886 << RegDeficit << "\n");
2887 } else {
2888 AvailableRegs.push_back(Reg);
2889 LLVM_DEBUG(
2890 dbgs()
2891 << printReg(Reg, TRI)
2892 << " is non-saved low register, adding to AvailableRegs\n");
2893 }
2894 }
2895
2896 // r7 can be used if it is not being used as the frame pointer.
2897 if (!HasFP || FramePtr != ARM::R7) {
2898 if (SavedRegs.test(ARM::R7)) {
2899 --RegDeficit;
2900 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2901 << RegDeficit << "\n");
2902 } else {
2903 AvailableRegs.push_back(ARM::R7);
2904 LLVM_DEBUG(
2905 dbgs()
2906 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2907 }
2908 }
2909
2910 // Each of r8-r11 needs to be copied to a low register, then pushed.
2911 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2912 if (SavedRegs.test(Reg)) {
2913 ++RegDeficit;
2914 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2915 << " is saved high register, RegDeficit = "
2916 << RegDeficit << "\n");
2917 }
2918 }
2919
2920 // LR can only be used by PUSH, not POP, and can't be used at all if the
2921 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2922 // are more limited at function entry than exit.
2923 if ((EntryRegDeficit > ExitRegDeficit) &&
2924 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2926 if (SavedRegs.test(ARM::LR)) {
2927 --RegDeficit;
2928 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2929 << RegDeficit << "\n");
2930 } else {
2931 AvailableRegs.push_back(ARM::LR);
2932 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2933 }
2934 }
2935
2936 // If there are more high registers that need pushing than low registers
2937 // available, push some more low registers so that we can use fewer push
2938 // instructions. This might not reduce RegDeficit all the way to zero,
2939 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2940 // need saving.
2941 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2942 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2943 unsigned Reg = AvailableRegs.pop_back_val();
2944 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2945 << " to make up reg deficit\n");
2946 SavedRegs.set(Reg);
2947 NumGPRSpills++;
2948 CS1Spilled = true;
2949 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2950 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2951 NumExtraCSSpill++;
2952 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2953 if (Reg == ARM::LR)
2954 LRSpilled = true;
2955 }
2956 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2957 << "\n");
2958 }
2959
2960 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2961 // restore LR in that case.
2962 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2963
2964 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2965 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2966 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2967 SavedRegs.set(ARM::LR);
2968 NumGPRSpills++;
2970 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2971 if (LRPos != UnspilledCS1GPRs.end())
2972 UnspilledCS1GPRs.erase(LRPos);
2973
2974 ForceLRSpill = false;
2975 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2976 !AFI->isThumb1OnlyFunction())
2977 NumExtraCSSpill++;
2978 }
2979
2980 // If stack and double are 8-byte aligned and we are spilling an odd number
2981 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2982 // the integer and double callee save areas.
2983 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2984 const Align TargetAlign = getStackAlign();
2985 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2986 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2987 for (unsigned Reg : UnspilledCS1GPRs) {
2988 // Don't spill high register if the function is thumb. In the case of
2989 // Windows on ARM, accept R11 (frame pointer)
2990 if (!AFI->isThumbFunction() ||
2991 (STI.isTargetWindows() && Reg == ARM::R11) ||
2992 isARMLowRegister(Reg) ||
2993 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2994 SavedRegs.set(Reg);
2995 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2996 << " to make up alignment\n");
2997 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2998 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2999 NumExtraCSSpill++;
3000 break;
3001 }
3002 }
3003 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
3004 unsigned Reg = UnspilledCS2GPRs.front();
3005 SavedRegs.set(Reg);
3006 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
3007 << " to make up alignment\n");
3008 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
3009 NumExtraCSSpill++;
3010 }
3011 }
3012
3013 // Estimate if we might need to scavenge registers at some point in order
3014 // to materialize a stack offset. If so, either spill one additional
3015 // callee-saved register or reserve a special spill slot to facilitate
3016 // register scavenging. Thumb1 needs a spill slot for stack pointer
3017 // adjustments and for frame index accesses when FP is high register,
3018 // even when the frame itself is small.
3019 unsigned RegsNeeded = 0;
3020 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
3021 RegsNeeded++;
3022 // With thumb1 execute-only we may need an additional register for saving
3023 // and restoring the CPSR.
3024 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3025 RegsNeeded++;
3026 }
3027
3028 if (RegsNeeded > NumExtraCSSpill) {
3029 // If any non-reserved CS register isn't spilled, just spill one or two
3030 // extra. That should take care of it!
3031 unsigned NumExtras = TargetAlign.value() / 4;
3033 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3034 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3035 if (!MRI.isReserved(Reg) &&
3036 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3037 Extras.push_back(Reg);
3038 NumExtras--;
3039 }
3040 }
3041 // For non-Thumb1 functions, also check for hi-reg CS registers
3042 if (!AFI->isThumb1OnlyFunction()) {
3043 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3044 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3045 if (!MRI.isReserved(Reg)) {
3046 Extras.push_back(Reg);
3047 NumExtras--;
3048 }
3049 }
3050 }
3051 if (NumExtras == 0) {
3052 for (unsigned Reg : Extras) {
3053 SavedRegs.set(Reg);
3054 if (!MRI.isPhysRegUsed(Reg))
3055 NumExtraCSSpill++;
3056 }
3057 }
3058 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3059 // Reserve a slot closest to SP or frame pointer.
3060 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3061 const TargetRegisterClass &RC = ARM::GPRRegClass;
3062 unsigned Size = TRI->getSpillSize(RC);
3063 Align Alignment = TRI->getSpillAlign(RC);
3064 RS->addScavengingFrameIndex(
3065 MFI.CreateSpillStackObject(Size, Alignment));
3066 --RegsNeeded;
3067 }
3068 }
3069 }
3070
3071 if (ForceLRSpill)
3072 SavedRegs.set(ARM::LR);
3073 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
3074}
3075
3077 MachineFrameInfo &MFI = MF.getFrameInfo();
3078 if (!MFI.isCalleeSavedInfoValid())
3079 return;
3080
3081 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3082 // into PC so it is not live out of the return block: Clear the Restored bit
3083 // in that case.
3084 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3085 if (Info.getReg() != ARM::LR)
3086 continue;
3087 if (all_of(MF, [](const MachineBasicBlock &MBB) {
3088 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
3089 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3090 Term.getOpcode() == ARM::t2LDMIA_RET ||
3091 Term.getOpcode() == ARM::tPOP_RET;
3092 });
3093 })) {
3094 Info.setRestored(false);
3095 break;
3096 }
3097 }
3098}
3099
3105
3107 BitVector &SavedRegs) const {
3109
3110 // If we have the "returned" parameter attribute which guarantees that we
3111 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3112 // record that fact for IPRA.
3113 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3114 if (AFI->getPreservesR0())
3115 SavedRegs.set(ARM::R0);
3116}
3117
3120 std::vector<CalleeSavedInfo> &CSI) const {
3121 // For CMSE entry functions, handle floating-point context as if it was a
3122 // callee-saved register.
3123 if (STI.hasV8_1MMainlineOps() &&
3125 CSI.emplace_back(ARM::FPCXTNS);
3126 CSI.back().setRestored(false);
3127 }
3128
3129 // For functions, which sign their return address, upon function entry, the
3130 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3131 // in this case.
3132 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3133 if (AFI.shouldSignReturnAddress()) {
3134 // The order of register must match the order we push them, because the
3135 // PEI assigns frame indices in that order. That order depends on the
3136 // PushPopSplitVariation, there are only two cases which we use with return
3137 // address signing:
3138 switch (STI.getPushPopSplitVariation(MF)) {
3140 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3141 CSI.insert(find_if(CSI,
3142 [=](const auto &CS) {
3143 MCRegister Reg = CS.getReg();
3144 return Reg == ARM::R10 || Reg == ARM::R11 ||
3145 Reg == ARM::R8 || Reg == ARM::R9 ||
3146 ARM::DPRRegClass.contains(Reg);
3147 }),
3148 CalleeSavedInfo(ARM::R12));
3149 break;
3151 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3152 // on the stack.
3153 CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
3154 break;
3157 "ABI-required frame pointers need a CSR split when signing return "
3158 "address.");
3159 CSI.insert(find_if(CSI,
3160 [=](const auto &CS) {
3161 MCRegister Reg = CS.getReg();
3162 return Reg != ARM::LR;
3163 }),
3164 CalleeSavedInfo(ARM::R12));
3165 break;
3166 default:
3167 llvm_unreachable("Unexpected CSR split with return address signing");
3168 }
3169 }
3170
3171 return false;
3172}
3173
3176 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
3177 NumEntries = std::size(FixedSpillOffsets);
3178 return FixedSpillOffsets;
3179}
3180
3181MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3184 const ARMBaseInstrInfo &TII =
3185 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3187 bool isARM = !AFI->isThumbFunction();
3188 DebugLoc dl = I->getDebugLoc();
3189 unsigned Opc = I->getOpcode();
3190 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3191 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
3192
3193 assert(!AFI->isThumb1OnlyFunction() &&
3194 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3195
3196 int PIdx = I->findFirstPredOperandIdx();
3197 ARMCC::CondCodes Pred = (PIdx == -1)
3198 ? ARMCC::AL
3199 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
3200 unsigned PredReg = TII.getFramePred(*I);
3201
3202 if (!hasReservedCallFrame(MF)) {
3203 // Bail early if the callee is expected to do the adjustment.
3204 if (IsDestroy && CalleePopAmount != -1U)
3205 return MBB.erase(I);
3206
3207 // If we have alloca, convert as follows:
3208 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3209 // ADJCALLSTACKUP -> add, sp, sp, amount
3210 unsigned Amount = TII.getFrameSize(*I);
3211 if (Amount != 0) {
3212 // We need to keep the stack aligned properly. To do this, we round the
3213 // amount of space needed for the outgoing arguments up to the next
3214 // alignment boundary.
3215 Amount = alignSPAdjust(Amount);
3216
3217 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3218 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
3219 Pred, PredReg);
3220 } else {
3221 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3222 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
3223 Pred, PredReg);
3224 }
3225 }
3226 } else if (CalleePopAmount != -1U) {
3227 // If the calling convention demands that the callee pops arguments from the
3228 // stack, we want to add it back if we have a reserved call frame.
3229 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
3230 MachineInstr::NoFlags, Pred, PredReg);
3231 }
3232 return MBB.erase(I);
3233}
3234
3235/// Get the minimum constant for ARM that is greater than or equal to the
3236/// argument. In ARM, constants can have any value that can be produced by
3237/// rotating an 8-bit value to the right by an even number of bits within a
3238/// 32-bit word.
3240 unsigned Shifted = 0;
3241
3242 if (Value == 0)
3243 return 0;
3244
3245 while (!(Value & 0xC0000000)) {
3246 Value = Value << 2;
3247 Shifted += 2;
3248 }
3249
3250 bool Carry = (Value & 0x00FFFFFF);
3251 Value = ((Value & 0xFF000000) >> 24) + Carry;
3252
3253 if (Value & 0x0000100)
3254 Value = Value & 0x000001FC;
3255
3256 if (Shifted > 24)
3257 Value = Value >> (Shifted - 24);
3258 else
3259 Value = Value << (24 - Shifted);
3260
3261 return Value;
3262}
3263
3264// The stack limit in the TCB is set to this many bytes above the actual
3265// stack limit.
3267
3268// Adjust the function prologue to enable split stacks. This currently only
3269// supports android and linux.
3270//
3271// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3272// must be well defined in order to allow for consistent implementations of the
3273// __morestack helper function. The ABI is also not a normal ABI in that it
3274// doesn't follow the normal calling conventions because this allows the
3275// prologue of each function to be optimized further.
3276//
3277// Currently, the ABI looks like (when calling __morestack)
3278//
3279// * r4 holds the minimum stack size requested for this function call
3280// * r5 holds the stack size of the arguments to the function
3281// * the beginning of the function is 3 instructions after the call to
3282// __morestack
3283//
3284// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3285// place the arguments on to the new stack, and the 3-instruction knowledge to
3286// jump directly to the body of the function when working on the new stack.
3287//
3288// An old (and possibly no longer compatible) implementation of __morestack for
3289// ARM can be found at [1].
3290//
3291// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3293 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3294 unsigned Opcode;
3295 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3296 bool Thumb = ST->isThumb();
3297 bool Thumb2 = ST->isThumb2();
3298
3299 // Sadly, this currently doesn't support varargs, platforms other than
3300 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3301 if (MF.getFunction().isVarArg())
3302 report_fatal_error("Segmented stacks do not support vararg functions.");
3303 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3304 report_fatal_error("Segmented stacks not supported on this platform.");
3305
3306 MachineFrameInfo &MFI = MF.getFrameInfo();
3307 const ARMBaseInstrInfo &TII =
3308 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3310 DebugLoc DL;
3311
3312 if (!MFI.needsSplitStackProlog())
3313 return;
3314
3315 uint64_t StackSize = MFI.getStackSize();
3316
3317 // Use R4 and R5 as scratch registers.
3318 // We save R4 and R5 before use and restore them before leaving the function.
3319 unsigned ScratchReg0 = ARM::R4;
3320 unsigned ScratchReg1 = ARM::R5;
3321 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3322 uint64_t AlignedStackSize;
3323
3324 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3325 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3329
3330 // Grab everything that reaches PrologueMBB to update there liveness as well.
3331 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3333 WalkList.push_back(&PrologueMBB);
3334
3335 do {
3336 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3337 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3338 if (BeforePrologueRegion.insert(PredBB).second)
3339 WalkList.push_back(PredBB);
3340 }
3341 } while (!WalkList.empty());
3342
3343 // The order in that list is important.
3344 // The blocks will all be inserted before PrologueMBB using that order.
3345 // Therefore the block that should appear first in the CFG should appear
3346 // first in the list.
3347 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3348 PostStackMBB};
3349
3350 BeforePrologueRegion.insert_range(AddedBlocks);
3351
3352 for (const auto &LI : PrologueMBB.liveins()) {
3353 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3354 PredBB->addLiveIn(LI);
3355 }
3356
3357 // Remove the newly added blocks from the list, since we know
3358 // we do not have to do the following updates for them.
3359 for (MachineBasicBlock *B : AddedBlocks) {
3360 BeforePrologueRegion.erase(B);
3361 MF.insert(PrologueMBB.getIterator(), B);
3362 }
3363
3364 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3365 // Make sure the LiveIns are still sorted and unique.
3366 MBB->sortUniqueLiveIns();
3367 // Replace the edges to PrologueMBB by edges to the sequences
3368 // we are about to add, but only update for immediate predecessors.
3369 if (MBB->isSuccessor(&PrologueMBB))
3370 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3371 }
3372
3373 // The required stack size that is aligned to ARM constant criterion.
3374 AlignedStackSize = alignToARMConstant(StackSize);
3375
3376 // When the frame size is less than 256 we just compare the stack
3377 // boundary directly to the value of the stack pointer, per gcc.
3378 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3379
3380 // We will use two of the callee save registers as scratch registers so we
3381 // need to save those registers onto the stack.
3382 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3383 // requested and arguments for __morestack().
3384 // SR0: Scratch Register #0
3385 // SR1: Scratch Register #1
3386 // push {SR0, SR1}
3387 if (Thumb) {
3388 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3390 .addReg(ScratchReg0)
3391 .addReg(ScratchReg1);
3392 } else {
3393 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3394 .addReg(ARM::SP, RegState::Define)
3395 .addReg(ARM::SP)
3397 .addReg(ScratchReg0)
3398 .addReg(ScratchReg1);
3399 }
3400
3401 // Emit the relevant DWARF information about the change in stack pointer as
3402 // well as where to find both r4 and r5 (the callee-save registers)
3403 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3404 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3405 CFIBuilder.buildDefCFAOffset(8);
3406 CFIBuilder.buildOffset(ScratchReg1, -4);
3407 CFIBuilder.buildOffset(ScratchReg0, -8);
3408 }
3409
3410 // mov SR1, sp
3411 if (Thumb) {
3412 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3413 .addReg(ARM::SP)
3415 } else if (CompareStackPointer) {
3416 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3417 .addReg(ARM::SP)
3419 .add(condCodeOp());
3420 }
3421
3422 // sub SR1, sp, #StackSize
3423 if (!CompareStackPointer && Thumb) {
3424 if (AlignedStackSize < 256) {
3425 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3426 .add(condCodeOp())
3427 .addReg(ScratchReg1)
3428 .addImm(AlignedStackSize)
3430 } else {
3431 if (Thumb2 || ST->genExecuteOnly()) {
3432 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3433 .addImm(AlignedStackSize);
3434 } else {
3435 auto MBBI = McrMBB->end();
3436 auto RegInfo = STI.getRegisterInfo();
3437 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3438 AlignedStackSize);
3439 }
3440 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3441 .add(condCodeOp())
3442 .addReg(ScratchReg1)
3443 .addReg(ScratchReg0)
3445 }
3446 } else if (!CompareStackPointer) {
3447 if (AlignedStackSize < 256) {
3448 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3449 .addReg(ARM::SP)
3450 .addImm(AlignedStackSize)
3452 .add(condCodeOp());
3453 } else {
3454 auto MBBI = McrMBB->end();
3455 auto RegInfo = STI.getRegisterInfo();
3456 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3457 AlignedStackSize);
3458 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3459 .addReg(ARM::SP)
3460 .addReg(ScratchReg0)
3462 .add(condCodeOp());
3463 }
3464 }
3465
3466 if (Thumb && ST->isThumb1Only()) {
3467 if (ST->genExecuteOnly()) {
3468 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3469 .addExternalSymbol("__STACK_LIMIT");
3470 } else {
3471 unsigned PCLabelId = ARMFI->createPICLabelUId();
3473 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3475 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3476
3477 // ldr SR0, [pc, offset(STACK_LIMIT)]
3478 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3481 }
3482
3483 // ldr SR0, [SR0]
3484 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3485 .addReg(ScratchReg0)
3486 .addImm(0)
3488 } else {
3489 // Get TLS base address from the coprocessor
3490 // mrc p15, #0, SR0, c13, c0, #3
3491 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3492 ScratchReg0)
3493 .addImm(15)
3494 .addImm(0)
3495 .addImm(13)
3496 .addImm(0)
3497 .addImm(3)
3499
3500 // Use the last tls slot on android and a private field of the TCP on linux.
3501 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3502 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3503
3504 // Get the stack limit from the right offset
3505 // ldr SR0, [sr0, #4 * TlsOffset]
3506 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3507 ScratchReg0)
3508 .addReg(ScratchReg0)
3509 .addImm(4 * TlsOffset)
3511 }
3512
3513 // Compare stack limit with stack size requested.
3514 // cmp SR0, SR1
3515 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3516 BuildMI(GetMBB, DL, TII.get(Opcode))
3517 .addReg(ScratchReg0)
3518 .addReg(ScratchReg1)
3520
3521 // This jump is taken if StackLimit <= SP - stack required.
3522 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3523 BuildMI(GetMBB, DL, TII.get(Opcode))
3524 .addMBB(PostStackMBB)
3526 .addReg(ARM::CPSR);
3527
3528 // Calling __morestack(StackSize, Size of stack arguments).
3529 // __morestack knows that the stack size requested is in SR0(r4)
3530 // and amount size of stack arguments is in SR1(r5).
3531
3532 // Pass first argument for the __morestack by Scratch Register #0.
3533 // The amount size of stack required
3534 if (Thumb) {
3535 if (AlignedStackSize < 256) {
3536 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3537 .add(condCodeOp())
3538 .addImm(AlignedStackSize)
3540 } else {
3541 if (Thumb2 || ST->genExecuteOnly()) {
3542 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3543 .addImm(AlignedStackSize);
3544 } else {
3545 auto MBBI = AllocMBB->end();
3546 auto RegInfo = STI.getRegisterInfo();
3547 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3548 AlignedStackSize);
3549 }
3550 }
3551 } else {
3552 if (AlignedStackSize < 256) {
3553 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3554 .addImm(AlignedStackSize)
3556 .add(condCodeOp());
3557 } else {
3558 auto MBBI = AllocMBB->end();
3559 auto RegInfo = STI.getRegisterInfo();
3560 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3561 AlignedStackSize);
3562 }
3563 }
3564
3565 // Pass second argument for the __morestack by Scratch Register #1.
3566 // The amount size of stack consumed to save function arguments.
3567 if (Thumb) {
3568 if (ARMFI->getArgumentStackSize() < 256) {
3569 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3570 .add(condCodeOp())
3573 } else {
3574 if (Thumb2 || ST->genExecuteOnly()) {
3575 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3577 } else {
3578 auto MBBI = AllocMBB->end();
3579 auto RegInfo = STI.getRegisterInfo();
3580 RegInfo->emitLoadConstPool(
3581 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3583 }
3584 }
3585 } else {
3586 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3587 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3590 .add(condCodeOp());
3591 } else {
3592 auto MBBI = AllocMBB->end();
3593 auto RegInfo = STI.getRegisterInfo();
3594 RegInfo->emitLoadConstPool(
3595 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3597 }
3598 }
3599
3600 // push {lr} - Save return address of this function.
3601 if (Thumb) {
3602 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3604 .addReg(ARM::LR);
3605 } else {
3606 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3607 .addReg(ARM::SP, RegState::Define)
3608 .addReg(ARM::SP)
3610 .addReg(ARM::LR);
3611 }
3612
3613 // Emit the DWARF info about the change in stack as well as where to find the
3614 // previous link register
3615 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3616 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3617 CFIBuilder.buildDefCFAOffset(12);
3618 CFIBuilder.buildOffset(ARM::LR, -12);
3619 }
3620
3621 // Call __morestack().
3622 if (Thumb) {
3623 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3625 .addExternalSymbol("__morestack");
3626 } else {
3627 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3628 .addExternalSymbol("__morestack");
3629 }
3630
3631 // pop {lr} - Restore return address of this original function.
3632 if (Thumb) {
3633 if (ST->isThumb1Only()) {
3634 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3636 .addReg(ScratchReg0);
3637 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3638 .addReg(ScratchReg0)
3640 } else {
3641 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3642 .addReg(ARM::LR, RegState::Define)
3643 .addReg(ARM::SP, RegState::Define)
3644 .addReg(ARM::SP)
3645 .addImm(4)
3647 }
3648 } else {
3649 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3650 .addReg(ARM::SP, RegState::Define)
3651 .addReg(ARM::SP)
3653 .addReg(ARM::LR);
3654 }
3655
3656 // Restore SR0 and SR1 in case of __morestack() was called.
3657 // __morestack() will skip PostStackMBB block so we need to restore
3658 // scratch registers from here.
3659 // pop {SR0, SR1}
3660 if (Thumb) {
3661 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3663 .addReg(ScratchReg0)
3664 .addReg(ScratchReg1);
3665 } else {
3666 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3667 .addReg(ARM::SP, RegState::Define)
3668 .addReg(ARM::SP)
3670 .addReg(ScratchReg0)
3671 .addReg(ScratchReg1);
3672 }
3673
3674 // Update the CFA offset now that we've popped
3675 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
3677
3678 // Return from this function.
3679 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3680
3681 // Restore SR0 and SR1 in case of __morestack() was not called.
3682 // pop {SR0, SR1}
3683 if (Thumb) {
3684 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3686 .addReg(ScratchReg0)
3687 .addReg(ScratchReg1);
3688 } else {
3689 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3690 .addReg(ARM::SP, RegState::Define)
3691 .addReg(ARM::SP)
3693 .addReg(ScratchReg0)
3694 .addReg(ScratchReg1);
3695 }
3696
3697 // Update the CFA offset now that we've popped
3698 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3699 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3700 CFIBuilder.buildDefCFAOffset(0);
3701
3702 // Tell debuggers that r4 and r5 are now the same as they were in the
3703 // previous function, that they're the "Same Value".
3704 CFIBuilder.buildSameValue(ScratchReg0);
3705 CFIBuilder.buildSameValue(ScratchReg1);
3706 }
3707
3708 // Organizing MBB lists
3709 PostStackMBB->addSuccessor(&PrologueMBB);
3710
3711 AllocMBB->addSuccessor(PostStackMBB);
3712
3713 GetMBB->addSuccessor(PostStackMBB);
3714 GetMBB->addSuccessor(AllocMBB);
3715
3716 McrMBB->addSuccessor(GetMBB);
3717
3718 PrevStackMBB->addSuccessor(McrMBB);
3719
3720#ifdef EXPENSIVE_CHECKS
3721 MF.verify();
3722#endif
3723}
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
static bool needsWinCFI(const MachineFunction *MF)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static int getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
SpillArea getSpillArea(Register Reg, ARMSubtarget::PushPopSplitVariation Variation, unsigned NumAlignedDPRCS2Regs, const ARMBaseRegisterInfo *RegInfo)
Get the spill area that Reg should be saved into in the prologue.
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
static const unsigned FramePtr
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
bool keepFramePointer(const MachineFunction &MF) const
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool requiresAAPCSFrameRecord(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
unsigned getDPRCalleeSavedArea1Size() const
void setDPRCalleeSavedArea1Offset(unsigned o)
void setGPRCalleeSavedArea2Size(unsigned s)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getGPRCalleeSavedArea3Size() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedArea1Size(unsigned s)
void setDPRCalleeSavedGapSize(unsigned s)
void setFPStatusSavesSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
void setGPRCalleeSavedArea3Size(unsigned s)
unsigned getFPStatusSavesSize() const
const ARMBaseRegisterInfo * getRegisterInfo() const override
enum PushPopSplitVariation getPushPopSplitVariation(const MachineFunction &MF) const
PushPopSplitVariation
How the push and pop instructions of callee saved general-purpose registers should be split.
@ SplitR11WindowsSEH
When the stack frame size is not known (because of variable-sized objects or realignment),...
@ SplitR7
R7 and LR must be adjacent, because R7 is the frame pointer, and must point to a frame record consist...
@ SplitR11AAPCSSignRA
When generating AAPCS-compilant frame chains, R11 is the frame pointer, and must be pushed adjacent t...
@ NoSplit
All GPRs can be pushed in a single instruction.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
BitVector & set()
Definition BitVector.h:370
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void buildDefCFARegister(MCRegister Reg) const
void buildSameValue(MCRegister Reg) const
void buildOffset(MCRegister Reg, int64_t Offset) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
A debug info location.
Definition DebugLoc.h:123
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
bool usesWindowsCFI() const
Definition MCAsmInfo.h:655
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
iterator_range< livein_iterator > liveins() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getStackProtectorIndex() const
Return the index for the stack protector object.
int64_t getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
LLVM_ABI BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI bool isLiveIn(Register Reg) const
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool erase(PtrType Ptr)
Remove pointer from the set.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
TargetFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl=Align(1), bool StackReal=true)
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
Primary interface to the complete machine description for the target machine.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
LLVM Value Representation.
Definition Value.h:75
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ D16
Only 16 D registers.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77