LLVM 23.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "AMDGPULaneMaskUtils.h"
12#include "GCNSubtarget.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "frame-info"
23
25 "amdgpu-spill-vgpr-to-agpr",
26 cl::desc("Enable spilling VGPRs to AGPRs"),
28 cl::init(true));
29
30// Find a register matching \p RC from \p LiveUnits which is unused and
31// available throughout the function. On failure, returns AMDGPU::NoRegister.
32// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
33// MCRegisters. This should reduce the number of iterations and avoid redundant
34// checking.
36 const LiveRegUnits &LiveUnits,
37 const TargetRegisterClass &RC) {
38 for (MCRegister Reg : RC) {
39 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
40 !MRI.isReserved(Reg))
41 return Reg;
42 }
43 return MCRegister();
44}
45
46// Find a scratch register that we can use in the prologue. We avoid using
47// callee-save registers since they may appear to be free when this is called
48// from canUseAsPrologue (during shrink wrapping), but then no longer be free
49// when this is called from emitPrologue.
51 MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
52 const TargetRegisterClass &RC, bool Unused = false) {
53 // Mark callee saved registers as used so we will not choose them.
54 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
55 for (unsigned i = 0; CSRegs[i]; ++i)
56 LiveUnits.addReg(CSRegs[i]);
57
58 // We are looking for a register that can be used throughout the entire
59 // function, so any use is unacceptable.
60 if (Unused)
61 return findUnusedRegister(MRI, LiveUnits, RC);
62
63 for (MCRegister Reg : RC) {
64 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
65 return Reg;
66 }
67
68 return MCRegister();
69}
70
71/// Query target location for spilling SGPRs
72/// \p IncludeScratchCopy : Also look for free scratch SGPRs
74 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
75 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
76 bool IncludeScratchCopy = true) {
78 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
79
80 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
81 const SIRegisterInfo *TRI = ST.getRegisterInfo();
82 unsigned Size = TRI->getSpillSize(RC);
83 Align Alignment = TRI->getSpillAlign(RC);
84
85 // We need to save and restore the given SGPR.
86
87 Register ScratchSGPR;
88 // 1: Try to save the given register into an unused scratch SGPR. The
89 // LiveUnits should have all the callee saved registers marked as used. For
90 // certain cases we skip copy to scratch SGPR.
91 if (IncludeScratchCopy)
92 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
93
94 if (!ScratchSGPR) {
95 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
97
98 if (TRI->spillSGPRToVGPR() &&
99 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
100 /*IsPrologEpilog=*/true)) {
101 // 2: There's no free lane to spill, and no free register to save the
102 // SGPR, so we're forced to take another VGPR to use for the spill.
106
107 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
108 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
109 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
110 << '\n';);
111 } else {
112 // Remove dead <FI> index
114 // 3: If all else fails, spill the register to memory.
115 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
117 SGPR,
119 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
120 << printReg(SGPR, TRI) << '\n');
121 }
122 } else {
126 LiveUnits.addReg(ScratchSGPR);
127 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
128 << printReg(ScratchSGPR, TRI) << '\n');
129 }
130}
131
132// We need to specially emit stack operations here because a different frame
133// register is used than in the rest of the function, as getFrameRegister would
134// use.
135static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
136 const SIMachineFunctionInfo &FuncInfo,
137 LiveRegUnits &LiveUnits, MachineFunction &MF,
140 Register SpillReg, int FI, Register FrameReg,
141 int64_t DwordOff = 0) {
142 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
143 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
144
145 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
148 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
149 FrameInfo.getObjectAlign(FI));
150 LiveUnits.addReg(SpillReg);
151 bool IsKill = !MBB.isLiveIn(SpillReg);
152 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
153 DwordOff, MMO, nullptr, &LiveUnits);
154 if (IsKill)
155 LiveUnits.removeReg(SpillReg);
156}
157
158static void buildEpilogRestore(const GCNSubtarget &ST,
159 const SIRegisterInfo &TRI,
160 const SIMachineFunctionInfo &FuncInfo,
161 LiveRegUnits &LiveUnits, MachineFunction &MF,
164 const DebugLoc &DL, Register SpillReg, int FI,
165 Register FrameReg, int64_t DwordOff = 0) {
166 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
167 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
168
169 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
172 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
173 FrameInfo.getObjectAlign(FI));
174 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
175 DwordOff, MMO, nullptr, &LiveUnits);
176}
177
179 const DebugLoc &DL, const SIInstrInfo *TII,
180 Register TargetReg) {
181 MachineFunction *MF = MBB.getParent();
183 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
184 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
185 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
186 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
187
188 if (MFI->getGITPtrHigh() != 0xffffffff) {
189 BuildMI(MBB, I, DL, SMovB32, TargetHi)
190 .addImm(MFI->getGITPtrHigh())
191 .addReg(TargetReg, RegState::ImplicitDefine);
192 } else {
193 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
194 BuildMI(MBB, I, DL, GetPC64, TargetReg);
195 }
196 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
197 MF->getRegInfo().addLiveIn(GitPtrLo);
198 MBB.addLiveIn(GitPtrLo);
199 BuildMI(MBB, I, DL, SMovB32, TargetLo)
200 .addReg(GitPtrLo);
201}
202
203static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
204 const SIMachineFunctionInfo *FuncInfo,
206 MachineBasicBlock::iterator MBBI, bool IsProlog) {
207 if (LiveUnits.empty()) {
208 LiveUnits.init(TRI);
209 if (IsProlog) {
210 LiveUnits.addLiveIns(MBB);
211 } else {
212 // In epilog.
213 LiveUnits.addLiveOuts(MBB);
214 LiveUnits.stepBackward(*MBBI);
215 }
216 }
217}
218
219namespace llvm {
220
221// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
222// BP, etc. These spills are delayed until the current function's frame is
223// finalized. For a given register, the builder uses the
224// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
228 MachineFunction &MF;
229 const GCNSubtarget &ST;
230 MachineFrameInfo &MFI;
231 SIMachineFunctionInfo *FuncInfo;
232 const SIInstrInfo *TII;
233 const SIRegisterInfo &TRI;
234 Register SuperReg;
236 LiveRegUnits &LiveUnits;
237 const DebugLoc &DL;
238 Register FrameReg;
239 ArrayRef<int16_t> SplitParts;
240 unsigned NumSubRegs;
241 unsigned EltSize = 4;
242
243 void saveToMemory(const int FI) const {
244 MachineRegisterInfo &MRI = MF.getRegInfo();
245 assert(!MFI.isDeadObjectIndex(FI));
246
247 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
248
250 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
251 if (!TmpVGPR)
252 report_fatal_error("failed to find free scratch register");
253
254 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
255 Register SubReg = NumSubRegs == 1
256 ? SuperReg
257 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
258 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
259 .addReg(SubReg);
260
261 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
262 FI, FrameReg, DwordOff);
263 DwordOff += 4;
264 }
265 }
266
267 void saveToVGPRLane(const int FI) const {
268 assert(!MFI.isDeadObjectIndex(FI));
269
270 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
272 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
273 assert(Spill.size() == NumSubRegs);
274
275 for (unsigned I = 0; I < NumSubRegs; ++I) {
276 Register SubReg = NumSubRegs == 1
277 ? SuperReg
278 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
279 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
280 Spill[I].VGPR)
281 .addReg(SubReg)
282 .addImm(Spill[I].Lane)
283 .addReg(Spill[I].VGPR, RegState::Undef);
284 }
285 }
286
287 void copyToScratchSGPR(Register DstReg) const {
288 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
289 .addReg(SuperReg)
291 }
292
293 void restoreFromMemory(const int FI) {
294 MachineRegisterInfo &MRI = MF.getRegInfo();
295
296 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
298 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
299 if (!TmpVGPR)
300 report_fatal_error("failed to find free scratch register");
301
302 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
303 Register SubReg = NumSubRegs == 1
304 ? SuperReg
305 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
306
307 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
308 TmpVGPR, FI, FrameReg, DwordOff);
309 assert(SubReg.isPhysical());
310
311 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
312 .addReg(TmpVGPR, RegState::Kill);
313 DwordOff += 4;
314 }
315 }
316
317 void restoreFromVGPRLane(const int FI) {
318 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
320 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
321 assert(Spill.size() == NumSubRegs);
322
323 for (unsigned I = 0; I < NumSubRegs; ++I) {
324 Register SubReg = NumSubRegs == 1
325 ? SuperReg
326 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
327 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
328 .addReg(Spill[I].VGPR)
329 .addImm(Spill[I].Lane);
330 }
331 }
332
333 void copyFromScratchSGPR(Register SrcReg) const {
334 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
335 .addReg(SrcReg)
337 }
338
339public:
344 const DebugLoc &DL, const SIInstrInfo *TII,
345 const SIRegisterInfo &TRI,
346 LiveRegUnits &LiveUnits, Register FrameReg)
347 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
348 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
349 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
350 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
351 FrameReg(FrameReg) {
352 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
353 SplitParts = TRI.getRegSplitParts(RC, EltSize);
354 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
355
356 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
357 }
358
359 void save() {
360 switch (SI.getKind()) {
362 return saveToMemory(SI.getIndex());
364 return saveToVGPRLane(SI.getIndex());
366 return copyToScratchSGPR(SI.getReg());
367 }
368 }
369
370 void restore() {
371 switch (SI.getKind()) {
373 return restoreFromMemory(SI.getIndex());
375 return restoreFromVGPRLane(SI.getIndex());
377 return copyFromScratchSGPR(SI.getReg());
378 }
379 }
380};
381
382} // namespace llvm
383
384// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
385void SIFrameLowering::emitEntryFunctionFlatScratchInit(
387 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
388 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
389 const SIInstrInfo *TII = ST.getInstrInfo();
390 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
391 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
392
393 // We don't need this if we only have spills since there is no user facing
394 // scratch.
395
396 // TODO: If we know we don't have flat instructions earlier, we can omit
397 // this from the input registers.
398 //
399 // TODO: We only need to know if we access scratch space through a flat
400 // pointer. Because we only detect if flat instructions are used at all,
401 // this will be used more often than necessary on VI.
402
403 Register FlatScrInitLo;
404 Register FlatScrInitHi;
405
406 if (ST.isAmdPalOS()) {
407 // Extract the scratch offset from the descriptor in the GIT
408 LiveRegUnits LiveUnits;
409 LiveUnits.init(*TRI);
410 LiveUnits.addLiveIns(MBB);
411
412 // Find unused reg to load flat scratch init into
413 MachineRegisterInfo &MRI = MF.getRegInfo();
414 Register FlatScrInit = AMDGPU::NoRegister;
415 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
416 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
417 AllSGPR64s = AllSGPR64s.slice(
418 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
419 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
420 for (MCPhysReg Reg : AllSGPR64s) {
421 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
422 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
423 FlatScrInit = Reg;
424 break;
425 }
426 }
427 assert(FlatScrInit && "Failed to find free register for scratch init");
428
429 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
430 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
431
432 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
433
434 // We now have the GIT ptr - now get the scratch descriptor from the entry
435 // at offset 0 (or offset 16 for a compute shader).
436 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
437 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
438 auto *MMO = MF.getMachineMemOperand(
439 PtrInfo,
442 8, Align(4));
443 unsigned Offset =
445 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
446 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
447 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
448 .addReg(FlatScrInit)
449 .addImm(EncodedOffset) // offset
450 .addImm(0) // cpol
451 .addMemOperand(MMO);
452
453 // Mask the offset in [47:0] of the descriptor
454 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
455 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
456 .addReg(FlatScrInitHi)
457 .addImm(0xffff);
458 And->getOperand(3).setIsDead(); // Mark SCC as dead.
459 } else {
460 Register FlatScratchInitReg =
462 assert(FlatScratchInitReg);
463
464 MachineRegisterInfo &MRI = MF.getRegInfo();
465 MRI.addLiveIn(FlatScratchInitReg);
466 MBB.addLiveIn(FlatScratchInitReg);
467
468 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
469 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
470 }
471
472 // Do a 64-bit pointer add.
473 if (ST.flatScratchIsPointer()) {
474 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
475 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
476 .addReg(FlatScrInitLo)
477 .addReg(ScratchWaveOffsetReg);
478 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
479 FlatScrInitHi)
480 .addReg(FlatScrInitHi)
481 .addImm(0);
482 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
483
484 using namespace AMDGPU::Hwreg;
485 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
486 .addReg(FlatScrInitLo)
487 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
488 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
489 .addReg(FlatScrInitHi)
490 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
491 return;
492 }
493
494 // For GFX9.
495 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
496 .addReg(FlatScrInitLo)
497 .addReg(ScratchWaveOffsetReg);
498 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
499 AMDGPU::FLAT_SCR_HI)
500 .addReg(FlatScrInitHi)
501 .addImm(0);
502 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
503
504 return;
505 }
506
507 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
508
509 // Copy the size in bytes.
510 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
511 .addReg(FlatScrInitHi, RegState::Kill);
512
513 // Add wave offset in bytes to private base offset.
514 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
515 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
516 .addReg(FlatScrInitLo)
517 .addReg(ScratchWaveOffsetReg);
518
519 // Convert offset to 256-byte units.
520 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
521 AMDGPU::FLAT_SCR_HI)
522 .addReg(FlatScrInitLo, RegState::Kill)
523 .addImm(8);
524 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
525}
526
527// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
528// memory. They should have been removed by now.
530 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
531 I != E; ++I) {
532 if (!MFI.isDeadObjectIndex(I))
533 return false;
534 }
535
536 return true;
537}
538
539// Shift down registers reserved for the scratch RSRC.
540Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
541 MachineFunction &MF) const {
542
543 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
544 const SIInstrInfo *TII = ST.getInstrInfo();
545 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
546 MachineRegisterInfo &MRI = MF.getRegInfo();
547 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
548
549 assert(MFI->isEntryFunction());
550
551 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
552
553 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
555 return Register();
556
557 if (ST.hasSGPRInitBug() ||
558 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
559 return ScratchRsrcReg;
560
561 // We reserved the last registers for this. Shift it down to the end of those
562 // which were actually used.
563 //
564 // FIXME: It might be safer to use a pseudoregister before replacement.
565
566 // FIXME: We should be able to eliminate unused input registers. We only
567 // cannot do this for the resources required for scratch access. For now we
568 // skip over user SGPRs and may leave unused holes.
569
570 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
571 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
572 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
573
574 // Skip the last N reserved elements because they should have already been
575 // reserved for VCC etc.
576 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
577 for (MCPhysReg Reg : AllSGPR128s) {
578 // Pick the first unallocated one. Make sure we don't clobber the other
579 // reserved input we needed. Also for PAL, make sure we don't clobber
580 // the GIT pointer passed in SGPR0 or SGPR8.
581 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
582 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
583 MRI.replaceRegWith(ScratchRsrcReg, Reg);
585 MRI.reserveReg(Reg, TRI);
586 return Reg;
587 }
588 }
589
590 return ScratchRsrcReg;
591}
592
593static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
594 return ST.hasFlatScratchEnabled() ? 1 : ST.getWavefrontSize();
595}
596
598 MachineBasicBlock &MBB) const {
599 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
600
601 // FIXME: If we only have SGPR spills, we won't actually be using scratch
602 // memory since these spill to VGPRs. We should be cleaning up these unused
603 // SGPR spill frame indices somewhere.
604
605 // FIXME: We still have implicit uses on SGPR spill instructions in case they
606 // need to spill to vector memory. It's likely that will not happen, but at
607 // this point it appears we need the setup. This part of the prolog should be
608 // emitted after frame indices are eliminated.
609
610 // FIXME: Remove all of the isPhysRegUsed checks
611
613 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
614 const SIInstrInfo *TII = ST.getInstrInfo();
615 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
617 const Function &F = MF.getFunction();
618 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
619
620 assert(MFI->isEntryFunction());
621
622 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
624
625 // We need to do the replacement of the private segment buffer register even
626 // if there are no stack objects. There could be stores to undef or a
627 // constant without an associated object.
628 //
629 // This will return `Register()` in cases where there are no actual
630 // uses of the SRSRC.
631 Register ScratchRsrcReg;
632 if (!ST.hasFlatScratchEnabled())
633 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
634
635 // Make the selected register live throughout the function.
636 if (ScratchRsrcReg) {
637 for (MachineBasicBlock &OtherBB : MF) {
638 if (&OtherBB != &MBB) {
639 OtherBB.addLiveIn(ScratchRsrcReg);
640 }
641 }
642 }
643
644 // Now that we have fixed the reserved SRSRC we need to locate the
645 // (potentially) preloaded SRSRC.
646 Register PreloadedScratchRsrcReg;
647 if (ST.isAmdHsaOrMesa(F)) {
648 PreloadedScratchRsrcReg =
650 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
651 // We added live-ins during argument lowering, but since they were not
652 // used they were deleted. We're adding the uses now, so add them back.
653 MRI.addLiveIn(PreloadedScratchRsrcReg);
654 MBB.addLiveIn(PreloadedScratchRsrcReg);
655 }
656 }
657
658 // Debug location must be unknown since the first debug location is used to
659 // determine the end of the prologue.
660 DebugLoc DL;
662
663 // We found the SRSRC first because it needs four registers and has an
664 // alignment requirement. If the SRSRC that we found is clobbering with
665 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
666 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
667 // wave offset to a free SGPR.
668 Register ScratchWaveOffsetReg;
669 if (PreloadedScratchWaveOffsetReg &&
670 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
671 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
672 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
673 AllSGPRs = AllSGPRs.slice(
674 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
675 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
676 for (MCPhysReg Reg : AllSGPRs) {
677 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
678 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
679 ScratchWaveOffsetReg = Reg;
680 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
681 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
682 break;
683 }
684 }
685
686 // FIXME: We can spill incoming arguments and restore at the end of the
687 // prolog.
688 if (!ScratchWaveOffsetReg)
690 "could not find temporary scratch offset register in prolog");
691 } else {
692 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
693 }
694 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
695
696 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
697 if (!mayReserveScratchForCWSR(MF)) {
698 if (hasFP(MF)) {
700 assert(FPReg != AMDGPU::FP_REG);
701 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
702 }
703
706 assert(SPReg != AMDGPU::SP_REG);
707 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
708 }
709 } else {
710 // We need to check if we're on a compute queue - if we are, then the CWSR
711 // trap handler may need to store some VGPRs on the stack. The first VGPR
712 // block is saved separately, so we only need to allocate space for any
713 // additional VGPR blocks used. For now, we will make sure there's enough
714 // room for the theoretical maximum number of VGPRs that can be allocated.
715 // FIXME: Figure out if the shader uses fewer VGPRs in practice.
716 assert(hasFP(MF));
718 assert(FPReg != AMDGPU::FP_REG);
719 unsigned VGPRSize = llvm::alignTo(
720 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -
722 MFI->getDynamicVGPRBlockSize())) *
723 4,
724 FrameInfo.getMaxAlign());
726
727 BuildMI(MBB, I, DL, TII->get(AMDGPU::GET_STACK_BASE), FPReg);
730 assert(SPReg != AMDGPU::SP_REG);
731
732 // If at least one of the constants can be inlined, then we can use
733 // s_cselect. Otherwise, use a mov and cmovk.
734 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||
736 ST.hasInv2PiInlineImm())) {
737 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)
738 .addImm(Offset + VGPRSize)
739 .addImm(Offset);
740 } else {
741 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
742 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)
743 .addImm(Offset + VGPRSize);
744 }
745 }
746 }
747
748 bool NeedsFlatScratchInit =
750 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
751 (!allStackObjectsAreDead(FrameInfo) && ST.hasFlatScratchEnabled()));
752
753 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
754 PreloadedScratchWaveOffsetReg && !ST.hasArchitectedFlatScratch()) {
755 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
756 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
757 }
758
759 if (NeedsFlatScratchInit) {
760 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
761 }
762
763 if (ScratchRsrcReg) {
764 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
765 PreloadedScratchRsrcReg,
766 ScratchRsrcReg, ScratchWaveOffsetReg);
767 }
768
769 if (ST.hasWaitXcnt()) {
770 // Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK
771 // replay. This aligns hardware behavior with the compiler's s_wait_xcnt
772 // insertion logic, which assumes multi-group mode by default.
773 unsigned RegEncoding =
775 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
776 .addImm(1)
777 .addImm(RegEncoding);
778 }
779}
780
781// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
782void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
784 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
785 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
786
787 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
788 const SIInstrInfo *TII = ST.getInstrInfo();
789 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
791 const Function &Fn = MF.getFunction();
792
793 if (ST.isAmdPalOS()) {
794 // The pointer to the GIT is formed from the offset passed in and either
795 // the amdgpu-git-ptr-high function attribute or the top part of the PC
796 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
797 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
798
799 buildGitPtr(MBB, I, DL, TII, Rsrc01);
800
801 // We now have the GIT ptr - now get the scratch descriptor from the entry
802 // at offset 0 (or offset 16 for a compute shader).
804 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
805 auto *MMO = MF.getMachineMemOperand(
806 PtrInfo,
809 16, Align(4));
810 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
811 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
812 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
813 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
814 .addReg(Rsrc01)
815 .addImm(EncodedOffset) // offset
816 .addImm(0) // cpol
817 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
818 .addMemOperand(MMO);
819
820 // The driver will always set the SRD for wave 64 (bits 118:117 of
821 // descriptor / bits 22:21 of third sub-reg will be 0b11)
822 // If the shader is actually wave32 we have to modify the const_index_stride
823 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
824 // reason the driver does this is that there can be cases where it presents
825 // 2 shaders with different wave size (e.g. VsFs).
826 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
827 if (ST.isWave32()) {
828 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
829 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
830 .addImm(21)
831 .addReg(Rsrc03);
832 }
833 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
834 assert(!ST.isAmdHsaOrMesa(Fn));
835 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
836
837 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
838 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
839
840 // Use relocations to get the pointer, and setup the other bits manually.
841 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
842
844 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
845
847 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
848
849 BuildMI(MBB, I, DL, Mov64, Rsrc01)
851 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
852 } else {
853 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
854
855 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
856 auto *MMO = MF.getMachineMemOperand(
857 PtrInfo,
860 8, Align(4));
861 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
863 .addImm(0) // offset
864 .addImm(0) // cpol
865 .addMemOperand(MMO)
866 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
867
870 }
871 } else {
872 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
873 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
874
875 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
876 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
877 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
878
879 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
880 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
881 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
882 }
883
884 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
885 .addImm(Lo_32(Rsrc23))
886 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
887
888 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
889 .addImm(Hi_32(Rsrc23))
890 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
891 } else if (ST.isAmdHsaOrMesa(Fn)) {
892 assert(PreloadedScratchRsrcReg);
893
894 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
895 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
896 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
897 }
898 }
899
900 // Add the scratch wave offset into the scratch RSRC.
901 //
902 // We only want to update the first 48 bits, which is the base address
903 // pointer, without touching the adjacent 16 bits of flags. We know this add
904 // cannot carry-out from bit 47, otherwise the scratch allocation would be
905 // impossible to fit in the 48-bit global address space.
906 //
907 // TODO: Evaluate if it is better to just construct an SRD using the flat
908 // scratch init and some constants rather than update the one we are passed.
909 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
910 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
911
912 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
913 // the kernel body via inreg arguments.
914 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
915 .addReg(ScratchRsrcSub0)
916 .addReg(ScratchWaveOffsetReg)
917 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
918 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
919 .addReg(ScratchRsrcSub1)
920 .addImm(0)
921 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
922 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
923}
924
926 switch (ID) {
930 return true;
934 return false;
935 }
936 llvm_unreachable("Invalid TargetStackID::Value");
937}
938
939// Activate only the inactive lanes when \p EnableInactiveLanes is true.
940// Otherwise, activate all lanes. It returns the saved exec.
942 MachineFunction &MF,
945 const DebugLoc &DL, bool IsProlog,
946 bool EnableInactiveLanes) {
947 Register ScratchExecCopy;
949 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
950 const SIInstrInfo *TII = ST.getInstrInfo();
951 const SIRegisterInfo &TRI = TII->getRegisterInfo();
953
954 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
955
956 if (FuncInfo->isWholeWaveFunction()) {
957 // Whole wave functions already have a copy of the original EXEC mask that
958 // we can use.
959 assert(IsProlog && "Epilog should look at return, not setup");
960 ScratchExecCopy =
961 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
962 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
963 } else {
964 ScratchExecCopy = findScratchNonCalleeSaveRegister(
965 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
966 }
967
968 if (!ScratchExecCopy)
969 report_fatal_error("failed to find free scratch register");
970
971 LiveUnits.addReg(ScratchExecCopy);
972
973 const unsigned SaveExecOpc =
974 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
975 : AMDGPU::S_OR_SAVEEXEC_B32)
976 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
977 : AMDGPU::S_OR_SAVEEXEC_B64);
978 auto SaveExec =
979 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
980 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
981
982 return ScratchExecCopy;
983}
984
988 Register FrameReg, Register FramePtrRegScratchCopy) const {
990 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
991 const SIInstrInfo *TII = ST.getInstrInfo();
992 const SIRegisterInfo &TRI = TII->getRegisterInfo();
995
996 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
997 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
998 // might end up flipping the EXEC bits twice.
999 Register ScratchExecCopy;
1000 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1001 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1002 if (!WWMScratchRegs.empty())
1003 ScratchExecCopy =
1004 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1005 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
1006
1007 auto StoreWWMRegisters =
1009 for (const auto &Reg : WWMRegs) {
1010 Register VGPR = Reg.first;
1011 int FI = Reg.second;
1012 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1013 VGPR, FI, FrameReg);
1014 }
1015 };
1016
1017 for (const Register Reg : make_first_range(WWMScratchRegs)) {
1018 if (!MRI.isReserved(Reg)) {
1019 MRI.addLiveIn(Reg);
1020 MBB.addLiveIn(Reg);
1021 }
1022 }
1023 StoreWWMRegisters(WWMScratchRegs);
1024
1025 auto EnableAllLanes = [&]() {
1026 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1027 };
1028
1029 if (!WWMCalleeSavedRegs.empty()) {
1030 if (ScratchExecCopy) {
1031 EnableAllLanes();
1032 } else {
1033 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1034 /*IsProlog*/ true,
1035 /*EnableInactiveLanes*/ false);
1036 }
1037 }
1038
1039 StoreWWMRegisters(WWMCalleeSavedRegs);
1040 if (FuncInfo->isWholeWaveFunction()) {
1041 // If we have already saved some WWM CSR registers, then the EXEC is already
1042 // -1 and we don't need to do anything else. Otherwise, set EXEC to -1 here.
1043 if (!ScratchExecCopy)
1044 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
1045 /*EnableInactiveLanes*/ true);
1046 else if (WWMCalleeSavedRegs.empty())
1047 EnableAllLanes();
1048 } else if (ScratchExecCopy) {
1049 // FIXME: Split block and make terminator.
1050 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1051 .addReg(ScratchExecCopy, RegState::Kill);
1052 LiveUnits.addReg(ScratchExecCopy);
1053 }
1054
1055 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1056
1057 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1058 // Special handle FP spill:
1059 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
1060 // Otherwise, FP has been moved to a temporary register and spill it
1061 // instead.
1062 Register Reg =
1063 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1064 if (!Reg)
1065 continue;
1066
1067 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1068 LiveUnits, FrameReg);
1069 SB.save();
1070 }
1071
1072 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
1073 // such scratch registers live throughout the function.
1074 SmallVector<Register, 1> ScratchSGPRs;
1075 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
1076 if (!ScratchSGPRs.empty()) {
1077 for (MachineBasicBlock &MBB : MF) {
1078 for (MCPhysReg Reg : ScratchSGPRs)
1079 MBB.addLiveIn(Reg);
1080
1081 MBB.sortUniqueLiveIns();
1082 }
1083 if (!LiveUnits.empty()) {
1084 for (MCPhysReg Reg : ScratchSGPRs)
1085 LiveUnits.addReg(Reg);
1086 }
1087 }
1088}
1089
1093 Register FrameReg, Register FramePtrRegScratchCopy) const {
1094 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1095 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1096 const SIInstrInfo *TII = ST.getInstrInfo();
1097 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1099 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1100
1101 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1102 // Special handle FP restore:
1103 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1104 // the FP value to a temporary register. The frame pointer should be
1105 // overwritten only at the end when all other spills are restored from
1106 // current frame.
1107 Register Reg =
1108 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1109 if (!Reg)
1110 continue;
1111
1112 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1113 LiveUnits, FrameReg);
1114 SB.restore();
1115 }
1116
1117 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1118 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1119 // this, we might end up flipping the EXEC bits twice.
1120 Register ScratchExecCopy;
1121 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1122 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1123 auto RestoreWWMRegisters =
1125 for (const auto &Reg : WWMRegs) {
1126 Register VGPR = Reg.first;
1127 int FI = Reg.second;
1128 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1129 VGPR, FI, FrameReg);
1130 }
1131 };
1132
1133 if (FuncInfo->isWholeWaveFunction()) {
1134 // For whole wave functions, the EXEC is already -1 at this point.
1135 // Therefore, we can restore the CSR WWM registers right away.
1136 RestoreWWMRegisters(WWMCalleeSavedRegs);
1137
1138 // The original EXEC is the first operand of the return instruction.
1139 MachineInstr &Return = MBB.instr_back();
1140 unsigned Opcode = Return.getOpcode();
1141 switch (Opcode) {
1142 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1143 Opcode = AMDGPU::SI_RETURN;
1144 break;
1145 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1146 Opcode = AMDGPU::SI_TCRETURN_GFX;
1147 break;
1148 default:
1149 llvm_unreachable("Unexpected return inst");
1150 }
1151 Register OrigExec = Return.getOperand(0).getReg();
1152
1153 if (!WWMScratchRegs.empty()) {
1154 BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)
1155 .addReg(OrigExec)
1156 .addImm(-1);
1157 RestoreWWMRegisters(WWMScratchRegs);
1158 }
1159
1160 // Restore original EXEC.
1161 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);
1162
1163 // Drop the first operand and update the opcode.
1164 Return.removeOperand(0);
1165 Return.setDesc(TII->get(Opcode));
1166
1167 return;
1168 }
1169
1170 if (!WWMScratchRegs.empty()) {
1171 ScratchExecCopy =
1172 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1173 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);
1174 }
1175 RestoreWWMRegisters(WWMScratchRegs);
1176 if (!WWMCalleeSavedRegs.empty()) {
1177 if (ScratchExecCopy) {
1178 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1179 } else {
1180 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1181 /*IsProlog*/ false,
1182 /*EnableInactiveLanes*/ false);
1183 }
1184 }
1185
1186 RestoreWWMRegisters(WWMCalleeSavedRegs);
1187 if (ScratchExecCopy) {
1188 // FIXME: Split block and make terminator.
1189 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1190 .addReg(ScratchExecCopy, RegState::Kill);
1191 }
1192}
1193
1195 MachineBasicBlock &MBB) const {
1197 if (FuncInfo->isEntryFunction()) {
1199 return;
1200 }
1201
1202 MachineFrameInfo &MFI = MF.getFrameInfo();
1203 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1204 const SIInstrInfo *TII = ST.getInstrInfo();
1205 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1206 MachineRegisterInfo &MRI = MF.getRegInfo();
1207
1208 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1209 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1210 Register BasePtrReg =
1211 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1212 LiveRegUnits LiveUnits;
1213
1215 // DebugLoc must be unknown since the first instruction with DebugLoc is used
1216 // to determine the end of the prologue.
1217 DebugLoc DL;
1218
1219 bool HasFP = false;
1220 bool HasBP = false;
1221 uint32_t NumBytes = MFI.getStackSize();
1222 uint32_t RoundedSize = NumBytes;
1223
1224 // Chain functions never return, so there's no need to save and restore the FP
1225 // or BP.
1226 bool SavesStackRegs = !FuncInfo->isChainFunction();
1227
1228 if (TRI.hasStackRealignment(MF))
1229 HasFP = true;
1230
1231 Register FramePtrRegScratchCopy;
1232 if (!HasFP && !hasFP(MF)) {
1233 // Emit the CSR spill stores with SP base register.
1234 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1235 FramePtrRegScratchCopy);
1236 } else if (SavesStackRegs) {
1237 // CSR spill stores will use FP as base register.
1238 Register SGPRForFPSaveRestoreCopy =
1239 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1240
1241 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1242 if (SGPRForFPSaveRestoreCopy) {
1243 // Copy FP to the scratch register now and emit the CFI entry. It avoids
1244 // the extra FP copy needed in the other two cases when FP is spilled to
1245 // memory or to a VGPR lane.
1247 FramePtrReg,
1248 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
1249 DL, TII, TRI, LiveUnits, FramePtrReg);
1250 SB.save();
1251 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1252 } else {
1253 // Copy FP into a new scratch register so that its previous value can be
1254 // spilled after setting up the new frame.
1255 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1256 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1257 if (!FramePtrRegScratchCopy)
1258 report_fatal_error("failed to find free scratch register");
1259
1260 LiveUnits.addReg(FramePtrRegScratchCopy);
1261 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1262 .addReg(FramePtrReg);
1263 }
1264 }
1265
1266 if (HasFP) {
1267 const unsigned Alignment = MFI.getMaxAlign().value();
1268
1269 RoundedSize += Alignment;
1270 if (LiveUnits.empty()) {
1271 LiveUnits.init(TRI);
1272 LiveUnits.addLiveIns(MBB);
1273 }
1274
1275 // s_add_i32 s33, s32, NumBytes
1276 // s_and_b32 s33, s33, 0b111...0000
1277 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
1278 .addReg(StackPtrReg)
1279 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
1281 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1282 .addReg(FramePtrReg, RegState::Kill)
1283 .addImm(-Alignment * getScratchScaleFactor(ST))
1285 And->getOperand(3).setIsDead(); // Mark SCC as dead.
1286 FuncInfo->setIsStackRealigned(true);
1287 } else if ((HasFP = hasFP(MF))) {
1288 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1289 .addReg(StackPtrReg)
1291 }
1292
1293 // If FP is used, emit the CSR spills with FP base register.
1294 if (HasFP) {
1295 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1296 FramePtrRegScratchCopy);
1297 if (FramePtrRegScratchCopy)
1298 LiveUnits.removeReg(FramePtrRegScratchCopy);
1299 }
1300
1301 // If we need a base pointer, set it up here. It's whatever the value of
1302 // the stack pointer is at this point. Any variable size objects will be
1303 // allocated after this, so we can still use the base pointer to reference
1304 // the incoming arguments.
1305 if ((HasBP = TRI.hasBasePointer(MF))) {
1306 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1307 .addReg(StackPtrReg)
1309 }
1310
1311 if (HasFP && RoundedSize != 0) {
1312 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1313 .addReg(StackPtrReg)
1314 .addImm(RoundedSize * getScratchScaleFactor(ST))
1316 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1317 }
1318
1319 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1320 (void)FPSaved;
1321 assert((!HasFP || FPSaved || !SavesStackRegs) &&
1322 "Needed to save FP but didn't save it anywhere");
1323
1324 // If we allow spilling to AGPRs we may have saved FP but then spill
1325 // everything into AGPRs instead of the stack.
1326 assert((HasFP || !FPSaved || !SavesStackRegs || EnableSpillVGPRToAGPR) &&
1327 "Saved FP but didn't need it");
1328
1329 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1330 (void)BPSaved;
1331 assert((!HasBP || BPSaved || !SavesStackRegs) &&
1332 "Needed to save BP but didn't save it anywhere");
1333
1334 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1335
1336 if (FuncInfo->isWholeWaveFunction()) {
1337 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose.
1338 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1339 }
1340}
1341
1343 MachineBasicBlock &MBB) const {
1344 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1345 if (FuncInfo->isEntryFunction())
1346 return;
1347
1348 const MachineFrameInfo &MFI = MF.getFrameInfo();
1349 if (FuncInfo->isChainFunction() && !MFI.hasTailCall())
1350 return;
1351
1352 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1353 const SIInstrInfo *TII = ST.getInstrInfo();
1354 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1355 MachineRegisterInfo &MRI = MF.getRegInfo();
1356 LiveRegUnits LiveUnits;
1357 // Get the insert location for the epilogue. If there were no terminators in
1358 // the block, get the last instruction.
1360 DebugLoc DL;
1361 if (!MBB.empty()) {
1362 MBBI = MBB.getLastNonDebugInstr();
1363 if (MBBI != MBB.end())
1364 DL = MBBI->getDebugLoc();
1365
1366 MBBI = MBB.getFirstTerminator();
1367 }
1368
1369 uint32_t NumBytes = MFI.getStackSize();
1370 uint32_t RoundedSize = FuncInfo->isStackRealigned()
1371 ? NumBytes + MFI.getMaxAlign().value()
1372 : NumBytes;
1373 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1374 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1375 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1376
1377 if (RoundedSize != 0) {
1378 if (TRI.hasBasePointer(MF)) {
1379 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1380 .addReg(TRI.getBaseRegister())
1382 } else if (hasFP(MF)) {
1383 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1384 .addReg(FramePtrReg)
1386 }
1387 }
1388
1389 Register FramePtrRegScratchCopy;
1390 Register SGPRForFPSaveRestoreCopy =
1391 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1392 if (FPSaved) {
1393 // CSR spill restores should use FP as base register. If
1394 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1395 // into a new scratch register and copy to FP later when other registers are
1396 // restored from the current stack frame.
1397 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1398 if (SGPRForFPSaveRestoreCopy) {
1399 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1400 } else {
1401 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1402 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1403 if (!FramePtrRegScratchCopy)
1404 report_fatal_error("failed to find free scratch register");
1405
1406 LiveUnits.addReg(FramePtrRegScratchCopy);
1407 }
1408
1409 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1410 FramePtrRegScratchCopy);
1411 }
1412
1413 if (FPSaved) {
1414 // Insert the copy to restore FP.
1415 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1416 : FramePtrRegScratchCopy;
1418 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1419 .addReg(SrcReg);
1420 if (SGPRForFPSaveRestoreCopy)
1422 } else {
1423 // Insert the CSR spill restores with SP as the base register.
1424 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1425 FramePtrRegScratchCopy);
1426 }
1427}
1428
1429#ifndef NDEBUG
1431 const MachineFrameInfo &MFI = MF.getFrameInfo();
1432 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1433 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1434 I != E; ++I) {
1435 if (!MFI.isDeadObjectIndex(I) &&
1438 return false;
1439 }
1440 }
1441
1442 return true;
1443}
1444#endif
1445
1447 int FI,
1448 Register &FrameReg) const {
1449 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1450
1451 FrameReg = RI->getFrameRegister(MF);
1453}
1454
1456 MachineFunction &MF,
1457 RegScavenger *RS) const {
1458 MachineFrameInfo &MFI = MF.getFrameInfo();
1459
1460 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1461 const SIInstrInfo *TII = ST.getInstrInfo();
1462 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1463 MachineRegisterInfo &MRI = MF.getRegInfo();
1465
1466 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1468
1469 if (SpillVGPRToAGPR) {
1470 // To track the spill frame indices handled in this pass.
1471 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1472 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1473
1474 bool SeenDbgInstr = false;
1475
1476 for (MachineBasicBlock &MBB : MF) {
1478 int FrameIndex;
1479 if (MI.isDebugInstr())
1480 SeenDbgInstr = true;
1481
1482 if (TII->isVGPRSpill(MI)) {
1483 // Try to eliminate stack used by VGPR spills before frame
1484 // finalization.
1485 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1486 AMDGPU::OpName::vaddr);
1487 int FI = MI.getOperand(FIOp).getIndex();
1488 Register VReg =
1489 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1490 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1491 TRI->isAGPR(MRI, VReg))) {
1492 assert(RS != nullptr);
1493 RS->enterBasicBlockEnd(MBB);
1494 RS->backward(std::next(MI.getIterator()));
1495 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1496 SpillFIs.set(FI);
1497 continue;
1498 }
1499 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1500 TII->isLoadFromStackSlot(MI, FrameIndex))
1501 if (!MFI.isFixedObjectIndex(FrameIndex))
1502 NonVGPRSpillFIs.set(FrameIndex);
1503 }
1504 }
1505
1506 // Stack slot coloring may assign different objects to the same stack slot.
1507 // If not, then the VGPR to AGPR spill slot is dead.
1508 for (unsigned FI : SpillFIs.set_bits())
1509 if (!NonVGPRSpillFIs.test(FI))
1510 FuncInfo->setVGPRToAGPRSpillDead(FI);
1511
1512 for (MachineBasicBlock &MBB : MF) {
1513 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1514 MBB.addLiveIn(Reg);
1515
1516 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1517 MBB.addLiveIn(Reg);
1518
1519 MBB.sortUniqueLiveIns();
1520
1521 if (!SpillFIs.empty() && SeenDbgInstr) {
1522 // FIXME: The dead frame indices are replaced with a null register from
1523 // the debug value instructions. We should instead, update it with the
1524 // correct register value. But not sure the register value alone is
1525 for (MachineInstr &MI : MBB) {
1526 if (MI.isDebugValue()) {
1527 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
1528 if (MI.getOperand(StackOperandIdx).isFI() &&
1529 !MFI.isFixedObjectIndex(
1530 MI.getOperand(StackOperandIdx).getIndex()) &&
1531 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
1532 MI.getOperand(StackOperandIdx)
1533 .ChangeToRegister(Register(), false /*isDef*/);
1534 }
1535 }
1536 }
1537 }
1538 }
1539 }
1540
1541 // At this point we've already allocated all spilled SGPRs to VGPRs if we
1542 // can. Any remaining SGPR spills will go to memory, so move them back to the
1543 // default stack.
1544 bool HaveSGPRToVMemSpill =
1545 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1547 "SGPR spill should have been removed in SILowerSGPRSpills");
1548
1549 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1550 // but currently hasNonSpillStackObjects is set only from source
1551 // allocas. Stack temps produced from legalization are not counted currently.
1552 if (!allStackObjectsAreDead(MFI)) {
1553 assert(RS && "RegScavenger required if spilling");
1554
1555 // Add an emergency spill slot
1556 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1557
1558 // If we are spilling SGPRs to memory with a large frame, we may need a
1559 // second VGPR emergency frame index.
1560 if (HaveSGPRToVMemSpill &&
1562 RS->addScavengingFrameIndex(MFI.CreateSpillStackObject(4, Align(4)));
1563 }
1564 }
1565}
1566
1568 MachineFunction &MF, RegScavenger *RS) const {
1569 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1570 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1571 MachineRegisterInfo &MRI = MF.getRegInfo();
1573
1574 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1575 // On gfx908, we had initially reserved highest available VGPR for AGPR
1576 // copy. Now since we are done with RA, check if there exist an unused VGPR
1577 // which is lower than the eariler reserved VGPR before RA. If one exist,
1578 // use it for AGPR copy instead of one reserved before RA.
1579 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1580 Register UnusedLowVGPR =
1581 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1582 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1583 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1584 // Reserve this newly identified VGPR (for AGPR copy)
1585 // reserved registers should already be frozen at this point
1586 // so we can avoid calling MRI.freezeReservedRegs and just use
1587 // MRI.reserveReg
1588 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1589 MRI.reserveReg(UnusedLowVGPR, TRI);
1590 }
1591 }
1592 // We initally reserved the highest available SGPR pair for long branches
1593 // now, after RA, we shift down to a lower unused one if one exists
1594 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
1595 Register UnusedLowSGPR =
1596 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1597 // If LongBranchReservedReg is null then we didn't find a long branch
1598 // and never reserved a register to begin with so there is nothing to
1599 // shift down. Then if UnusedLowSGPR is null, there isn't available lower
1600 // register to use so just keep the original one we set.
1601 if (LongBranchReservedReg && UnusedLowSGPR) {
1602 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
1603 MRI.reserveReg(UnusedLowSGPR, TRI);
1604 }
1605}
1606
1607// The special SGPR spills like the one needed for FP, BP or any reserved
1608// registers delayed until frame lowering.
1610 MachineFunction &MF, BitVector &SavedVGPRs,
1611 bool NeedExecCopyReservedReg) const {
1612 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1613 MachineRegisterInfo &MRI = MF.getRegInfo();
1615 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1616 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1617 LiveRegUnits LiveUnits;
1618 LiveUnits.init(*TRI);
1619 // Initially mark callee saved registers as used so we will not choose them
1620 // while looking for scratch SGPRs.
1621 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1622 for (unsigned I = 0; CSRegs[I]; ++I)
1623 LiveUnits.addReg(CSRegs[I]);
1624
1625 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1626
1627 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1628 if (NeedExecCopyReservedReg ||
1629 (ReservedRegForExecCopy &&
1630 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1631 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1632 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
1633 if (UnusedScratchReg) {
1634 // If found any unused scratch SGPR, reserve the register itself for Exec
1635 // copy and there is no need for any spills in that case.
1636 MFI->setSGPRForEXECCopy(UnusedScratchReg);
1637 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1638 LiveUnits.addReg(UnusedScratchReg);
1639 } else {
1640 // Needs spill.
1641 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
1642 "Re-reserving spill slot for EXEC copy register");
1643 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
1644 /*IncludeScratchCopy=*/false);
1645 }
1646 } else if (ReservedRegForExecCopy) {
1647 // Reset it at this point. There are no whole-wave copies and spills
1648 // encountered.
1649 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
1650 }
1651
1652 // Chain functions don't return to the caller, so they don't need to preserve
1653 // the FP and BP.
1654 if (MFI->isChainFunction())
1655 return;
1656
1657 // hasFP only knows about stack objects that already exist. We're now
1658 // determining the stack slots that will be created, so we have to predict
1659 // them. Stack objects force FP usage with calls.
1660 //
1661 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1662 // don't want to report it here.
1663 //
1664 // FIXME: Is this really hasReservedCallFrame?
1665 const bool WillHaveFP =
1666 FrameInfo.hasCalls() &&
1667 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1668
1669 if (WillHaveFP || hasFP(MF)) {
1670 Register FramePtrReg = MFI->getFrameOffsetReg();
1671 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1672 "Re-reserving spill slot for FP");
1673 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
1674 }
1675
1676 if (TRI->hasBasePointer(MF)) {
1677 Register BasePtrReg = TRI->getBaseRegister();
1678 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1679 "Re-reserving spill slot for BP");
1680 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1681 }
1682}
1683
1684// Only report VGPRs to generic code.
1686 BitVector &SavedVGPRs,
1687 RegScavenger *RS) const {
1689
1690 // If this is a function with the amdgpu_cs_chain[_preserve] calling
1691 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
1692 // we don't need to save and restore anything.
1693 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
1694 return;
1695
1697
1698 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1699 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1700 const SIInstrInfo *TII = ST.getInstrInfo();
1701 bool NeedExecCopyReservedReg = false;
1702
1703 MachineInstr *ReturnMI = nullptr;
1704 for (MachineBasicBlock &MBB : MF) {
1705 for (MachineInstr &MI : MBB) {
1706 // TODO: Walking through all MBBs here would be a bad heuristic. Better
1707 // handle them elsewhere.
1708 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1709 NeedExecCopyReservedReg = true;
1710 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1711 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1712 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1713 (MFI->isChainFunction() &&
1714 TII->isChainCallOpcode(MI.getOpcode()))) {
1715 // We expect all return to be the same size.
1716 assert(!ReturnMI ||
1717 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1718 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1719 ReturnMI = &MI;
1720 }
1721 }
1722 }
1723
1724 SmallVector<Register> SortedWWMVGPRs;
1725 for (Register Reg : MFI->getWWMReservedRegs()) {
1726 // The shift-back is needed only for the VGPRs used for SGPR spills and they
1727 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
1728 // reserved registers.
1729 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1730 if (TRI->getRegSizeInBits(*RC) != 32)
1731 continue;
1732 SortedWWMVGPRs.push_back(Reg);
1733 }
1734
1735 sort(SortedWWMVGPRs, std::greater<Register>());
1736 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
1737
1738 if (MFI->isEntryFunction())
1739 return;
1740
1741 if (MFI->isWholeWaveFunction()) {
1742 // In practice, all the VGPRs are WWM registers, and we will need to save at
1743 // least their inactive lanes. Add them to WWMReservedRegs.
1744 assert(!NeedExecCopyReservedReg &&
1745 "Whole wave functions can use the reg mapped for their i1 argument");
1746
1747 unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();
1748 for (MCRegister Reg :
1749 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1750 if (MF.getRegInfo().isPhysRegModified(Reg)) {
1751 MFI->reserveWWMRegister(Reg);
1752 MF.begin()->addLiveIn(Reg);
1753 }
1754 MF.begin()->sortUniqueLiveIns();
1755 }
1756
1757 // Remove any VGPRs used in the return value because these do not need to be saved.
1758 // This prevents CSR restore from clobbering return VGPRs.
1759 if (ReturnMI) {
1760 for (auto &Op : ReturnMI->operands()) {
1761 if (Op.isReg())
1762 SavedVGPRs.reset(Op.getReg());
1763 }
1764 }
1765
1766 // Create the stack objects for WWM registers now.
1767 for (Register Reg : MFI->getWWMReservedRegs()) {
1768 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1769 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1770 TRI->getSpillAlign(*RC));
1771 }
1772
1773 // Ignore the SGPRs the default implementation found.
1774 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1775
1776 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1777 // In gfx908 there was do AGPR loads and stores and thus spilling also
1778 // require a temporary VGPR.
1779 if (!ST.hasGFX90AInsts())
1780 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1781
1782 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1783
1784 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1785 // allow the default insertion to handle them.
1786 for (auto &Reg : MFI->getWWMSpills())
1787 SavedVGPRs.reset(Reg.first);
1788}
1789
1791 BitVector &SavedRegs,
1792 RegScavenger *RS) const {
1795 if (MFI->isEntryFunction())
1796 return;
1797
1798 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1799 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1800
1801 // The SP is specifically managed and we don't want extra spills of it.
1802 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1803
1804 const BitVector AllSavedRegs = SavedRegs;
1805 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1806
1807 // We have to anticipate introducing CSR VGPR spills or spill of caller
1808 // save VGPR reserved for SGPR spills as we now always create stack entry
1809 // for it, if we don't have any stack objects already, since we require a FP
1810 // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1811 // there are any SGPR spills. Whether they are CSR spills or otherwise.
1812 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1813 const bool WillHaveFP =
1814 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1815
1816 // FP will be specially managed like SP.
1817 if (WillHaveFP || hasFP(MF))
1818 SavedRegs.reset(MFI->getFrameOffsetReg());
1819
1820 // Return address use with return instruction is hidden through the SI_RETURN
1821 // pseudo. Given that and since the IPRA computes actual register usage and
1822 // does not use CSR list, the clobbering of return address by function calls
1823 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1824 // usage collection. This will ensure save/restore of return address happens
1825 // in those scenarios.
1826 const MachineRegisterInfo &MRI = MF.getRegInfo();
1827 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1828 if (!MFI->isEntryFunction() &&
1829 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1830 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1831 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1832 }
1833}
1834
1836 const GCNSubtarget &ST,
1837 std::vector<CalleeSavedInfo> &CSI) {
1839 MachineFrameInfo &MFI = MF.getFrameInfo();
1840 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1841
1842 assert(
1843 llvm::is_sorted(CSI,
1844 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
1845 return A.getReg() < B.getReg();
1846 }) &&
1847 "Callee saved registers not sorted");
1848
1849 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
1850 return !CSI.isSpilledToReg() &&
1851 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1852 !FuncInfo->isWWMReservedRegister(CSI.getReg());
1853 };
1854
1855 auto CSEnd = CSI.end();
1856 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1857 Register Reg = CSIt->getReg();
1858 if (!CanUseBlockOps(*CSIt))
1859 continue;
1860
1861 // Find all the regs that will fit in a 32-bit mask starting at the current
1862 // reg and build said mask. It should have 1 for every register that's
1863 // included, with the current register as the least significant bit.
1864 uint32_t Mask = 1;
1865 CSEnd = std::remove_if(
1866 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1867 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1868 Mask |= 1 << (CSI.getReg() - Reg);
1869 return true;
1870 } else {
1871 return false;
1872 }
1873 });
1874
1875 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
1876 Register RegBlock =
1877 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1878 if (!RegBlock) {
1879 // We couldn't find a super register for the block. This can happen if
1880 // the register we started with is too high (e.g. v232 if the maximum is
1881 // v255). We therefore try to get the last register block and figure out
1882 // the mask from there.
1883 Register LastBlockStart =
1884 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1885 RegBlock =
1886 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1887 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1888 "Couldn't find super register");
1889 int RegDelta = Reg - LastBlockStart;
1890 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
1891 "Bad shift amount");
1892 Mask <<= RegDelta;
1893 }
1894
1895 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
1896
1897 // The stack objects can be a bit smaller than the register block if we know
1898 // some of the high bits of Mask are 0. This may happen often with calling
1899 // conventions where the caller and callee-saved VGPRs are interleaved at
1900 // a small boundary (e.g. 8 or 16).
1901 int UnusedBits = llvm::countl_zero(Mask);
1902 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1903 int FrameIdx =
1904 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1905 /*isSpillSlot=*/true);
1906 MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);
1907
1908 CSIt->setFrameIdx(FrameIdx);
1909 CSIt->setReg(RegBlock);
1910 }
1911 CSI.erase(CSEnd, CSI.end());
1912}
1913
1916 std::vector<CalleeSavedInfo> &CSI) const {
1917 if (CSI.empty())
1918 return true; // Early exit if no callee saved registers are modified!
1919
1920 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1921 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1922
1923 if (UseVGPRBlocks)
1924 assignSlotsUsingVGPRBlocks(MF, ST, CSI);
1925
1926 return assignCalleeSavedSpillSlotsImpl(MF, TRI, CSI) || UseVGPRBlocks;
1927}
1928
1931 std::vector<CalleeSavedInfo> &CSI) const {
1932 if (CSI.empty())
1933 return true; // Early exit if no callee saved registers are modified!
1934
1935 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1936 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1937 const SIRegisterInfo *RI = ST.getRegisterInfo();
1938 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1939 Register BasePtrReg = RI->getBaseRegister();
1940 Register SGPRForFPSaveRestoreCopy =
1941 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1942 Register SGPRForBPSaveRestoreCopy =
1943 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1944 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1945 return false;
1946
1947 unsigned NumModifiedRegs = 0;
1948
1949 if (SGPRForFPSaveRestoreCopy)
1950 NumModifiedRegs++;
1951 if (SGPRForBPSaveRestoreCopy)
1952 NumModifiedRegs++;
1953
1954 for (auto &CS : CSI) {
1955 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1956 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1957 if (--NumModifiedRegs)
1958 break;
1959 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1960 SGPRForBPSaveRestoreCopy) {
1961 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1962 if (--NumModifiedRegs)
1963 break;
1964 }
1965 }
1966
1967 return false;
1968}
1969
1971 const MachineFunction &MF) const {
1972
1973 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1974 const MachineFrameInfo &MFI = MF.getFrameInfo();
1975 const SIInstrInfo *TII = ST.getInstrInfo();
1976 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1977 uint64_t MaxOffset = EstStackSize - 1;
1978
1979 // We need the emergency stack slots to be allocated in range of the
1980 // MUBUF/flat scratch immediate offset from the base register, so assign these
1981 // first at the incoming SP position.
1982 //
1983 // TODO: We could try sorting the objects to find a hole in the first bytes
1984 // rather than allocating as close to possible. This could save a lot of space
1985 // on frames with alignment requirements.
1986 if (ST.hasFlatScratchEnabled()) {
1987 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1989 return false;
1990 } else {
1991 if (TII->isLegalMUBUFImmOffset(MaxOffset))
1992 return false;
1993 }
1994
1995 return true;
1996}
1997
2001 MachineFunction *MF = MBB.getParent();
2002 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2003 if (!ST.useVGPRBlockOpsForCSR())
2004 return false;
2005
2006 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2008 const SIInstrInfo *TII = ST.getInstrInfo();
2010
2011 const TargetRegisterClass *BlockRegClass =
2012 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
2013 for (const CalleeSavedInfo &CS : CSI) {
2014 Register Reg = CS.getReg();
2015 if (!BlockRegClass->contains(Reg) ||
2016 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2018 continue;
2019 }
2020
2021 // Build a scratch block store.
2022 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2023 int FrameIndex = CS.getFrameIdx();
2024 MachinePointerInfo PtrInfo =
2025 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2026 MachineMemOperand *MMO =
2028 FrameInfo.getObjectSize(FrameIndex),
2029 FrameInfo.getObjectAlign(FrameIndex));
2030
2031 BuildMI(MBB, MI, MI->getDebugLoc(),
2032 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2033 .addReg(Reg, getKillRegState(false))
2034 .addFrameIndex(FrameIndex)
2036 .addImm(0)
2037 .addImm(Mask)
2038 .addMemOperand(MMO);
2039
2040 FuncInfo->setHasSpilledVGPRs();
2041
2042 // Add the register to the liveins. This is necessary because if any of the
2043 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2044 // then the whole block will be marked as reserved and `updateLiveness` will
2045 // skip it.
2046 MBB.addLiveIn(Reg);
2047 }
2048 MBB.sortUniqueLiveIns();
2049
2050 return true;
2051}
2052
2056 MachineFunction *MF = MBB.getParent();
2057 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2058 if (!ST.useVGPRBlockOpsForCSR())
2059 return false;
2060
2062 MachineFrameInfo &MFI = MF->getFrameInfo();
2063 const SIInstrInfo *TII = ST.getInstrInfo();
2064 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2065 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
2066 for (const CalleeSavedInfo &CS : reverse(CSI)) {
2067 Register Reg = CS.getReg();
2068 if (!BlockRegClass->contains(Reg) ||
2069 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2071 continue;
2072 }
2073
2074 // Build a scratch block load.
2075 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2076 int FrameIndex = CS.getFrameIdx();
2077 MachinePointerInfo PtrInfo =
2078 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2080 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
2081 MFI.getObjectAlign(FrameIndex));
2082
2083 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
2084 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2085 .addFrameIndex(FrameIndex)
2086 .addReg(FuncInfo->getStackPtrOffsetReg())
2087 .addImm(0)
2088 .addImm(Mask)
2089 .addMemOperand(MMO);
2090 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
2091
2092 // Add the register to the liveins. This is necessary because if any of the
2093 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2094 // then the whole block will be marked as reserved and `updateLiveness` will
2095 // skip it.
2096 MBB.addLiveIn(Reg);
2097 }
2098
2099 MBB.sortUniqueLiveIns();
2100 return true;
2101}
2102
2104 MachineFunction &MF,
2107 int64_t Amount = I->getOperand(0).getImm();
2108 if (Amount == 0)
2109 return MBB.erase(I);
2110
2111 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2112 const SIInstrInfo *TII = ST.getInstrInfo();
2113 const DebugLoc &DL = I->getDebugLoc();
2114 unsigned Opc = I->getOpcode();
2115 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2116 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2117
2118 if (!hasReservedCallFrame(MF)) {
2119 Amount = alignTo(Amount, getStackAlign());
2120 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2123
2124 Amount *= getScratchScaleFactor(ST);
2125 if (IsDestroy)
2126 Amount = -Amount;
2127 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
2128 .addReg(SPReg)
2129 .addImm(Amount);
2130 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
2131 } else if (CalleePopAmount != 0) {
2132 llvm_unreachable("is this used?");
2133 }
2134
2135 return MBB.erase(I);
2136}
2137
2138/// Returns true if the frame will require a reference to the stack pointer.
2139///
2140/// This is the set of conditions common to setting up the stack pointer in a
2141/// kernel, and for using a frame pointer in a callable function.
2142///
2143/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
2144/// references SP.
2146 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
2147}
2148
2149// The FP for kernels is always known 0, so we never really need to setup an
2150// explicit register for it. However, DisableFramePointerElim will force us to
2151// use a register for it.
2153 const MachineFrameInfo &MFI = MF.getFrameInfo();
2154
2155 // For entry functions we can use an immediate offset in most cases,
2156 // so the presence of calls doesn't imply we need a distinct frame pointer.
2157 if (MFI.hasCalls() &&
2159 // All offsets are unsigned, so need to be addressed in the same direction
2160 // as stack growth.
2161
2162 // FIXME: This function is pretty broken, since it can be called before the
2163 // frame layout is determined or CSR spills are inserted.
2164 return MFI.getStackSize() != 0;
2165 }
2166
2167 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
2168 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
2169 MF) ||
2172}
2173
2175 const MachineFunction &MF) const {
2176 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
2179}
2180
2181// This is essentially a reduced version of hasFP for entry functions. Since the
2182// stack pointer is known 0 on entry to kernels, we never really need an FP
2183// register. We may need to initialize the stack pointer depending on the frame
2184// properties, which logically overlaps many of the cases where an ordinary
2185// function would require an FP.
2187 const MachineFunction &MF) const {
2188 // Callable functions always require a stack pointer reference.
2190 "only expected to call this for entry points functions");
2191
2192 const MachineFrameInfo &MFI = MF.getFrameInfo();
2193
2194 // Entry points ordinarily don't need to initialize SP. We have to set it up
2195 // for callees if there are any. Also note tail calls are only possible via
2196 // the `llvm.amdgcn.cs.chain` intrinsic.
2197 if (MFI.hasCalls() || MFI.hasTailCall())
2198 return true;
2199
2200 // We still need to initialize the SP if we're doing anything weird that
2201 // references the SP, like variable sized stack objects.
2202 return frameTriviallyRequiresSP(MFI);
2203}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const int BlockSize
Definition TarWriter.cpp:33
static const LaneMaskConstants & get(const GCNSubtarget &ST)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:744
BitVector & set()
Definition BitVector.h:370
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:189
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition BitVector.h:732
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:175
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
A debug info location.
Definition DebugLoc.h:123
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
const HexagonRegisterInfo & getRegisterInfo() const
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
mop_range operands()
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI)
reserveReg – Mark a register as reserved so checks like isAllocatable will not suggest using it.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
constexpr RegState getKillRegState(bool B)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1970
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.