LLVM 23.0.0git
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This implements the ScheduleDAGInstrs class, which implements
10/// re-scheduling of MachineInstrs.
11//
12//===----------------------------------------------------------------------===//
13
15
17#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/SparseSet.h"
41#include "llvm/Config/llvm-config.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/Type.h"
45#include "llvm/IR/Value.h"
46#include "llvm/MC/LaneBitmask.h"
51#include "llvm/Support/Debug.h"
53#include "llvm/Support/Format.h"
55#include <algorithm>
56#include <cassert>
57#include <iterator>
58#include <utility>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "machine-scheduler"
64
65static cl::opt<bool>
66 EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
67 cl::desc("Enable use of AA during MI DAG construction"));
68
69static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
70 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
71
72static cl::opt<bool>
73 EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
74 cl::desc("Use TargetSchedModel for latency lookup"));
75
76static cl::opt<bool>
77 EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
78 cl::desc("Use InstrItineraryData for latency lookup"));
79
80// Note: the two options below might be used in tuning compile time vs
81// output quality. Setting HugeRegion so large that it will never be
82// reached means best-effort, but may be slow.
83
84// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
85// together hold this many SUs, a reduction of maps will be done.
87 HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(500),
88 cl::desc("The limit to use while constructing the DAG "
89 "prior to scheduling, at which point a trade-off "
90 "is made to avoid excessive compile time."));
91
92#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
94 "sched-print-cycles", cl::Hidden, cl::init(false),
95 cl::desc("Report top/bottom cycles when dumping SUnit instances"));
96#endif
97
99#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
100 dbgs() << "{ ";
101 for (const SUnit *SU : L) {
102 dbgs() << "SU(" << SU->NodeNum << ")";
103 if (SU != L.back())
104 dbgs() << ", ";
105 }
106 dbgs() << "}\n";
107#endif
108}
109
111 const MachineLoopInfo *mli,
112 bool RemoveKillFlags)
113 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
116 Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
117 DbgValues.clear();
118
119 const TargetSubtargetInfo &ST = mf.getSubtarget();
121}
122
123/// If this machine instr has memory reference information and it can be
124/// tracked to a normal reference to a known object, return the Value
125/// for that object. This function returns false the memory location is
126/// unknown or may alias anything.
128 const MachineFrameInfo &MFI,
130 const DataLayout &DL) {
131 auto AllMMOsOkay = [&]() {
132 for (const MachineMemOperand *MMO : MI->memoperands()) {
133 // TODO: Figure out whether isAtomic is really necessary (see D57601).
134 if (MMO->isVolatile() || MMO->isAtomic())
135 return false;
136
137 if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
138 // Function that contain tail calls don't have unique PseudoSourceValue
139 // objects. Two PseudoSourceValues might refer to the same or
140 // overlapping locations. The client code calling this function assumes
141 // this is not the case. So return a conservative answer of no known
142 // object.
143 if (MFI.hasTailCall())
144 return false;
145
146 // For now, ignore PseudoSourceValues which may alias LLVM IR values
147 // because the code that uses this function has no way to cope with
148 // such aliases.
149 if (PSV->isAliased(&MFI))
150 return false;
151
152 bool MayAlias = PSV->mayAlias(&MFI);
153 Objects.emplace_back(PSV, MayAlias);
154 } else if (const Value *V = MMO->getValue()) {
156 if (!getUnderlyingObjectsForCodeGen(V, Objs))
157 return false;
158
159 for (Value *V : Objs) {
161 Objects.emplace_back(V, true);
162 }
163 } else
164 return false;
165 }
166 return true;
167 };
168
169 if (!AllMMOsOkay()) {
170 Objects.clear();
171 return false;
172 }
173
174 return true;
175}
176
180
182 // Subclasses should no longer refer to the old block.
183 BB = nullptr;
184}
185
189 unsigned regioninstrs) {
190 assert(bb == BB && "startBlock should set BB");
192 RegionEnd = end;
193 NumRegionInstrs = regioninstrs;
194}
195
197 // Nothing to do.
198}
199
201 MachineInstr *ExitMI =
202 RegionEnd != BB->end()
204 : nullptr;
205 ExitSU.setInstr(ExitMI);
206 // Add dependencies on the defs and uses of the instruction.
207 if (ExitMI) {
208 const MCInstrDesc &MIDesc = ExitMI->getDesc();
209 for (const MachineOperand &MO : ExitMI->all_uses()) {
210 unsigned OpIdx = MO.getOperandNo();
211 Register Reg = MO.getReg();
212 if (Reg.isPhysical()) {
213 // addPhysRegDataDeps uses the provided operand index to retrieve
214 // the operand use cycle from the scheduling model. If the operand
215 // is "fake" (e.g., an operand of a call instruction used to pass
216 // an argument to the called function.), the scheduling model may not
217 // have an entry for it. If this is the case, pass -1 as operand index,
218 // which will cause addPhysRegDataDeps to add an artificial dependency.
219 // FIXME: Using hasImplicitUseOfPhysReg here is inaccurate as it misses
220 // aliases. When fixing, make sure to update addPhysRegDataDeps, too.
221 bool IsRealUse = OpIdx < MIDesc.getNumOperands() ||
222 MIDesc.hasImplicitUseOfPhysReg(Reg);
223 for (MCRegUnit Unit : TRI->regunits(Reg))
224 Uses.insert(PhysRegSUOper(&ExitSU, IsRealUse ? OpIdx : -1, Unit));
225 } else if (Reg.isVirtual() && MO.readsReg()) {
227 }
228 }
229 }
230 if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
231 // For others, e.g. fallthrough, conditional branch, assume the exit
232 // uses all the registers that are livein to the successor blocks.
233 for (const MachineBasicBlock *Succ : BB->successors()) {
234 for (const auto &LI : Succ->liveins()) {
235 for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
236 auto [Unit, Mask] = *U;
237 if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit))
238 Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
239 }
240 }
241 }
242 }
243}
244
245/// MO is an operand of SU's instruction that defines a physical register. Adds
246/// data dependencies from SU to any uses of the physical register.
247void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
248 const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
249 assert(MO.isDef() && "expect physreg def");
250 Register Reg = MO.getReg();
251
252 // Ask the target if address-backscheduling is desirable, and if so how much.
253 const TargetSubtargetInfo &ST = MF.getSubtarget();
254
255 // Only use any non-zero latency for real defs/uses, in contrast to
256 // "fake" operands added by regalloc.
257 const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
258 bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
259 !DefMIDesc.hasImplicitDefOfPhysReg(Reg));
260 for (MCRegUnit Unit : TRI->regunits(Reg)) {
261 for (RegUnit2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end();
262 ++I) {
263 SUnit *UseSU = I->SU;
264 if (UseSU == SU)
265 continue;
266
267 // Adjust the dependence latency using operand def/use information,
268 // then allow the target to perform its own adjustments.
269 MachineInstr *UseInstr = nullptr;
270 int UseOpIdx = I->OpIdx;
271 bool ImplicitPseudoUse = false;
272 SDep Dep;
273 if (UseOpIdx < 0) {
274 Dep = SDep(SU, SDep::Artificial);
275 } else {
276 // Set the hasPhysRegDefs only for physreg defs that have a use within
277 // the scheduling region.
278 SU->hasPhysRegDefs = true;
279
280 UseInstr = UseSU->getInstr();
281 Register UseReg = UseInstr->getOperand(UseOpIdx).getReg();
282 const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
283 ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
285
286 Dep = SDep(SU, SDep::Data, UseReg);
287 }
288 if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
289 Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
290 UseInstr, UseOpIdx));
291 } else {
292 Dep.setLatency(0);
293 }
294 ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep, &SchedModel);
295 UseSU->addPred(Dep);
296 }
297 }
298}
299
300/// Adds register dependencies (data, anti, and output) from this SUnit
301/// to following instructions in the same scheduling region that depend the
302/// physical register referenced at OperIdx.
303void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
304 MachineInstr *MI = SU->getInstr();
305 MachineOperand &MO = MI->getOperand(OperIdx);
306 Register Reg = MO.getReg();
307 // We do not need to track any dependencies for constant registers.
308 if (MRI.isConstantPhysReg(Reg))
309 return;
310
311 const TargetSubtargetInfo &ST = MF.getSubtarget();
312
313 // Optionally add output and anti dependencies. For anti
314 // dependencies we use a latency of 0 because for a multi-issue
315 // target we want to allow the defining instruction to issue
316 // in the same cycle as the using instruction.
317 // TODO: Using a latency of 1 here for output dependencies assumes
318 // there's no cost for reusing registers.
319 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
320 for (MCRegUnit Unit : TRI->regunits(Reg)) {
321 for (RegUnit2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end();
322 ++I) {
323 SUnit *DefSU = I->SU;
324 if (DefSU == &ExitSU)
325 continue;
326 MachineInstr *DefInstr = DefSU->getInstr();
327 MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx);
328 if (DefSU != SU &&
329 (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) {
330 SDep Dep(SU, Kind, DefMO.getReg());
331 if (Kind != SDep::Anti) {
332 Dep.setLatency(
333 SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
334 }
335 ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep,
336 &SchedModel);
337 DefSU->addPred(Dep);
338 }
339 }
340 }
341
342 if (MO.isUse()) {
343 SU->hasPhysRegUses = true;
344 // Either insert a new Reg2SUnits entry with an empty SUnits list, or
345 // retrieve the existing SUnits list for this register's uses.
346 // Push this SUnit on the use list.
347 for (MCRegUnit Unit : TRI->regunits(Reg))
348 Uses.insert(PhysRegSUOper(SU, OperIdx, Unit));
349 if (RemoveKillFlags)
350 MO.setIsKill(false);
351 } else {
352 addPhysRegDataDeps(SU, OperIdx);
353
354 // Clear previous uses and defs of this register and its subregisters.
355 for (MCRegUnit Unit : TRI->regunits(Reg)) {
356 Uses.eraseAll(Unit);
357 if (!MO.isDead())
358 Defs.eraseAll(Unit);
359 }
360
361 if (MO.isDead() && SU->isCall) {
362 // Calls will not be reordered because of chain dependencies (see
363 // below). Since call operands are dead, calls may continue to be added
364 // to the DefList making dependence checking quadratic in the size of
365 // the block. Instead, we leave only one call at the back of the
366 // DefList.
367 for (MCRegUnit Unit : TRI->regunits(Reg)) {
368 RegUnit2SUnitsMap::RangePair P = Defs.equal_range(Unit);
371 for (bool isBegin = I == B; !isBegin; /* empty */) {
372 isBegin = (--I) == B;
373 if (!I->SU->isCall)
374 break;
375 I = Defs.erase(I);
376 }
377 }
378 }
379
380 // Defs are pushed in the order they are visited and never reordered.
381 for (MCRegUnit Unit : TRI->regunits(Reg))
382 Defs.insert(PhysRegSUOper(SU, OperIdx, Unit));
383 }
384}
385
387{
388 Register Reg = MO.getReg();
389 // No point in tracking lanemasks if we don't have interesting subregisters.
390 const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
391 if (!RC.HasDisjunctSubRegs)
392 return LaneBitmask::getAll();
393
394 unsigned SubReg = MO.getSubReg();
395 if (SubReg == 0)
396 return RC.getLaneMask();
397 return TRI->getSubRegIndexLaneMask(SubReg);
398}
399
401 auto RegUse = CurrentVRegUses.find(MO.getReg());
402 if (RegUse == CurrentVRegUses.end())
403 return true;
404 return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
405}
406
407/// Adds register output and data dependencies from this SUnit to instructions
408/// that occur later in the same scheduling region if they read from or write to
409/// the virtual register defined at OperIdx.
410///
411/// TODO: Hoist loop induction variable increments. This has to be
412/// reevaluated. Generally, IV scheduling should be done before coalescing.
413void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
414 MachineInstr *MI = SU->getInstr();
415 MachineOperand &MO = MI->getOperand(OperIdx);
416 Register Reg = MO.getReg();
417
418 LaneBitmask DefLaneMask;
419 LaneBitmask KillLaneMask;
420 if (TrackLaneMasks) {
421 bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
422 DefLaneMask = getLaneMaskForMO(MO);
423 // If we have a <read-undef> flag, none of the lane values comes from an
424 // earlier instruction.
425 KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
426
427 if (MO.getSubReg() != 0 && MO.isUndef()) {
428 // There may be other subregister defs on the same instruction of the same
429 // register in later operands. The lanes of other defs will now be live
430 // after this instruction, so these should not be treated as killed by the
431 // instruction even though they appear to be killed in this one operand.
432 for (const MachineOperand &OtherMO :
433 llvm::drop_begin(MI->operands(), OperIdx + 1))
434 if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
435 KillLaneMask &= ~getLaneMaskForMO(OtherMO);
436 }
437
438 // Clear undef flag, we'll re-add it later once we know which subregister
439 // Def is first.
440 MO.setIsUndef(false);
441 } else {
442 DefLaneMask = LaneBitmask::getAll();
443 KillLaneMask = LaneBitmask::getAll();
444 }
445
446 if (MO.isDead()) {
447 assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
448 } else {
449 // Add data dependence to all uses we found so far.
450 const TargetSubtargetInfo &ST = MF.getSubtarget();
452 E = CurrentVRegUses.end(); I != E; /*empty*/) {
453 LaneBitmask LaneMask = I->LaneMask;
454 // Ignore uses of other lanes.
455 if ((LaneMask & KillLaneMask).none()) {
456 ++I;
457 continue;
458 }
459
460 if ((LaneMask & DefLaneMask).any()) {
461 SUnit *UseSU = I->SU;
462 MachineInstr *Use = UseSU->getInstr();
463 SDep Dep(SU, SDep::Data, Reg);
464 Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
465 I->OperandIndex));
466 ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep,
467 &SchedModel);
468 UseSU->addPred(Dep);
469 }
470
471 LaneMask &= ~KillLaneMask;
472 // If we found a Def for all lanes of this use, remove it from the list.
473 if (LaneMask.any()) {
474 I->LaneMask = LaneMask;
475 ++I;
476 } else
477 I = CurrentVRegUses.erase(I);
478 }
479 }
480
481 // Shortcut: Singly defined vregs do not have output/anti dependencies.
482 if (MRI.hasOneDef(Reg))
483 return;
484
485 // Add output dependence to the next nearest defs of this vreg.
486 //
487 // Unless this definition is dead, the output dependence should be
488 // transitively redundant with antidependencies from this definition's
489 // uses. We're conservative for now until we have a way to guarantee the uses
490 // are not eliminated sometime during scheduling. The output dependence edge
491 // is also useful if output latency exceeds def-use latency.
492 LaneBitmask LaneMask = DefLaneMask;
493 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
494 CurrentVRegDefs.end())) {
495 // Ignore defs for other lanes.
496 if ((V2SU.LaneMask & LaneMask).none())
497 continue;
498 // Add an output dependence.
499 SUnit *DefSU = V2SU.SU;
500 // Ignore additional defs of the same lanes in one instruction. This can
501 // happen because lanemasks are shared for targets with too many
502 // subregisters. We also use some representration tricks/hacks where we
503 // add super-register defs/uses, to imply that although we only access parts
504 // of the reg we care about the full one.
505 if (DefSU == SU)
506 continue;
507 SDep Dep(SU, SDep::Output, Reg);
508 Dep.setLatency(
509 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
510 DefSU->addPred(Dep);
511
512 // Update current definition. This can get tricky if the def was about a
513 // bigger lanemask before. We then have to shrink it and create a new
514 // VReg2SUnit for the non-overlapping part.
515 LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
516 LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
517 V2SU.SU = SU;
518 V2SU.LaneMask = OverlapMask;
519 if (NonOverlapMask.any())
520 CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
521 }
522 // If there was no CurrentVRegDefs entry for some lanes yet, create one.
523 if (LaneMask.any())
524 CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
525}
526
527/// Adds a register data dependency if the instruction that defines the
528/// virtual register used at OperIdx is mapped to an SUnit. Add a register
529/// antidependency from this SUnit to instructions that occur later in the same
530/// scheduling region if they write the virtual register.
531///
532/// TODO: Handle ExitSU "uses" properly.
533void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
534 const MachineInstr *MI = SU->getInstr();
535 assert(!MI->isDebugOrPseudoInstr());
536
537 const MachineOperand &MO = MI->getOperand(OperIdx);
538 Register Reg = MO.getReg();
539
540 // Remember the use. Data dependencies will be added when we find the def.
543 CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
544
545 // Add antidependences to the following defs of the vreg.
546 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
547 CurrentVRegDefs.end())) {
548 // Ignore defs for unrelated lanes.
549 LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
550 if ((PrevDefLaneMask & LaneMask).none())
551 continue;
552 if (V2SU.SU == SU)
553 continue;
554
555 V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
556 }
557}
558
559
561 unsigned Latency) {
562 if (SUa->getInstr()->mayAlias(getAAForDep(), *SUb->getInstr(), UseTBAA)) {
563 SDep Dep(SUa, SDep::MayAliasMem);
564 Dep.setLatency(Latency);
565 SUb->addPred(Dep);
566 }
567}
568
569/// Creates an SUnit for each real instruction, numbered in top-down
570/// topological order. The instruction order A < B, implies that no edge exists
571/// from B to A.
572///
573/// Map each real instruction to its SUnit.
574///
575/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
576/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
577/// instead of pointers.
578///
579/// MachineScheduler relies on initSUnits numbering the nodes by their order in
580/// the original instruction list.
582 // We'll be allocating one SUnit for each real instruction in the region,
583 // which is contained within a basic block.
584 SUnits.reserve(NumRegionInstrs);
585
587 if (MI.isDebugOrPseudoInstr())
588 continue;
589
590 SUnit *SU = newSUnit(&MI);
591 MISUnitMap[&MI] = SU;
592
593 SU->isCall = MI.isCall();
594 SU->isCommutable = MI.isCommutable();
595
596 // Assign the Latency field of SU using target-provided information.
597 SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
598
599 // If this SUnit uses a reserved or unbuffered resource, mark it as such.
600 //
601 // Reserved resources block an instruction from issuing and stall the
602 // entire pipeline. These are identified by BufferSize=0.
603 //
604 // Unbuffered resources prevent execution of subsequent instructions that
605 // require the same resources. This is used for in-order execution pipelines
606 // within an out-of-order core. These are identified by BufferSize=1.
607 if (SchedModel.hasInstrSchedModel()) {
608 const MCSchedClassDesc *SC = getSchedClass(SU);
609 for (const MCWriteProcResEntry &PRE :
610 make_range(SchedModel.getWriteProcResBegin(SC),
611 SchedModel.getWriteProcResEnd(SC))) {
612 switch (SchedModel.getResourceBufferSize(PRE.ProcResourceIdx)) {
613 case 0:
614 SU->hasReservedResource = true;
615 break;
616 case 1:
617 SU->isUnbuffered = true;
618 break;
619 default:
620 break;
621 }
622 }
623 }
624 }
625}
626
628 : public SmallMapVector<ValueType, SUList, 4> {
629 /// Current total number of SUs in map.
630 unsigned NumNodes = 0;
631
632 /// 1 for loads, 0 for stores. (see comment in SUList)
633 unsigned TrueMemOrderLatency;
634
635public:
636 Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
637
638 /// To keep NumNodes up to date, insert() is used instead of
639 /// this operator w/ push_back().
641 llvm_unreachable("Don't use. Use insert() instead."); };
642
643 /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
644 /// reduce().
645 void inline insert(SUnit *SU, ValueType V) {
646 MapVector::operator[](V).push_back(SU);
647 NumNodes++;
648 }
649
650 /// Clears the list of SUs mapped to V.
651 void inline clearList(ValueType V) {
652 iterator Itr = find(V);
653 if (Itr != end()) {
654 assert(NumNodes >= Itr->second.size());
655 NumNodes -= Itr->second.size();
656
657 Itr->second.clear();
658 }
659 }
660
661 /// Clears map from all contents.
662 void clear() {
664 NumNodes = 0;
665 }
666
667 unsigned inline size() const { return NumNodes; }
668
669 /// Counts the number of SUs in this map after a reduction.
671 NumNodes = 0;
672 for (auto &I : *this)
673 NumNodes += I.second.size();
674 }
675
676 unsigned inline getTrueMemOrderLatency() const {
677 return TrueMemOrderLatency;
678 }
679
680 void dump();
681};
682
684 Value2SUsMap &Val2SUsMap) {
685 for (auto &I : Val2SUsMap)
686 addChainDependencies(SU, I.second,
687 Val2SUsMap.getTrueMemOrderLatency());
688}
689
691 Value2SUsMap &Val2SUsMap,
692 ValueType V) {
693 Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
694 if (Itr != Val2SUsMap.end())
695 addChainDependencies(SU, Itr->second,
696 Val2SUsMap.getTrueMemOrderLatency());
697}
698
700 assert(BarrierChain != nullptr);
701
702 for (auto &[V, SUs] : map) {
703 (void)V;
704 for (auto *SU : SUs)
705 SU->addPredBarrier(BarrierChain);
706 }
707 map.clear();
708}
709
711 RegPressureTracker *RPTracker,
712 PressureDiffs *PDiffs,
713 LiveIntervals *LIS,
714 bool TrackLaneMasks) {
715 const TargetSubtargetInfo &ST = MF.getSubtarget();
716 bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
717 : ST.useAA();
718 if (UseAA && AA)
719 AAForDep.emplace(*AA);
720
721 BarrierChain = nullptr;
722 MemOpsProcessed = 0;
723
724 this->TrackLaneMasks = TrackLaneMasks;
725 MISUnitMap.clear();
727
728 // Create an SUnit for each real instruction.
729 initSUnits();
730
731 if (PDiffs)
732 PDiffs->init(SUnits.size());
733
734 // We build scheduling units by walking a block's instruction list
735 // from bottom to top.
736
737 // Each MIs' memory operand(s) is analyzed to a list of underlying
738 // objects. The SU is then inserted in the SUList(s) mapped from the
739 // Value(s). Each Value thus gets mapped to lists of SUs depending
740 // on it, stores and loads kept separately. Two SUs are trivially
741 // non-aliasing if they both depend on only identified Values and do
742 // not share any common Value.
743 Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
744
745 // Certain memory accesses are known to not alias any SU in Stores
746 // or Loads, and have therefore their own 'NonAlias'
747 // domain. E.g. spill / reload instructions never alias LLVM I/R
748 // Values. It would be nice to assume that this type of memory
749 // accesses always have a proper memory operand modelling, and are
750 // therefore never unanalyzable, but this is conservatively not
751 // done.
752 Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
753
754 // Track all instructions that may raise floating-point exceptions.
755 // These do not depend on one other (or normal loads or stores), but
756 // must not be rescheduled across global barriers. Note that we don't
757 // really need a "map" here since we don't track those MIs by value;
758 // using the same Value2SUsMap data type here is simply a matter of
759 // convenience.
760 Value2SUsMap FPExceptions;
761
762 // Remove any stale debug info; sometimes BuildSchedGraph is called again
763 // without emitting the info from the previous call.
764 DbgValues.clear();
765 FirstDbgValue = nullptr;
766
767 assert(Defs.empty() && Uses.empty() &&
768 "Only BuildGraph should update Defs/Uses");
769 Defs.setUniverse(TRI->getNumRegs());
770 Uses.setUniverse(TRI->getNumRegs());
771
772 assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
773 assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
774 unsigned NumVirtRegs = MRI.getNumVirtRegs();
775 CurrentVRegDefs.setUniverse(NumVirtRegs);
776 CurrentVRegUses.setUniverse(NumVirtRegs);
777
778 // Model data dependencies between instructions being scheduled and the
779 // ExitSU.
781
782 // Walk the list of instructions, from bottom moving up.
783 MachineInstr *DbgMI = nullptr;
785 MII != MIE; --MII) {
786 MachineInstr &MI = *std::prev(MII);
787 if (DbgMI) {
788 DbgValues.emplace_back(DbgMI, &MI);
789 DbgMI = nullptr;
790 }
791
792 if (MI.isDebugValue() || MI.isDebugPHI()) {
793 DbgMI = &MI;
794 continue;
795 }
796
797 if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
798 continue;
799
800 SUnit *SU = MISUnitMap[&MI];
801 assert(SU && "No SUnit mapped to this MI");
802
803 if (RPTracker) {
804 RegisterOperands RegOpers;
805 RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
806 if (TrackLaneMasks) {
807 SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
808 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
809 }
810 if (PDiffs != nullptr)
811 PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
812
813 if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
814 RPTracker->recedeSkipDebugValues();
815 assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
816 RPTracker->recede(RegOpers);
817 }
818
819 assert(
820 (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
821 "Cannot schedule terminators or labels!");
822
823 // Add register-based dependencies (data, anti, and output).
824 // For some instructions (calls, returns, inline-asm, etc.) there can
825 // be explicit uses and implicit defs, in which case the use will appear
826 // on the operand list before the def. Do two passes over the operand
827 // list to make sure that defs are processed before any uses.
828 bool HasVRegDef = false;
829 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
830 const MachineOperand &MO = MI.getOperand(j);
831 if (!MO.isReg() || !MO.isDef())
832 continue;
833 Register Reg = MO.getReg();
834 if (Reg.isPhysical()) {
835 addPhysRegDeps(SU, j);
836 } else if (Reg.isVirtual()) {
837 HasVRegDef = true;
838 addVRegDefDeps(SU, j);
839 }
840 }
841 // Now process all uses.
842 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
843 const MachineOperand &MO = MI.getOperand(j);
844 // Only look at use operands.
845 // We do not need to check for MO.readsReg() here because subsequent
846 // subregister defs will get output dependence edges and need no
847 // additional use dependencies.
848 if (!MO.isReg() || !MO.isUse())
849 continue;
850 Register Reg = MO.getReg();
851 if (Reg.isPhysical()) {
852 addPhysRegDeps(SU, j);
853 } else if (Reg.isVirtual() && MO.readsReg()) {
854 addVRegUseDeps(SU, j);
855 }
856 }
857
858 // If we haven't seen any uses in this scheduling region, create a
859 // dependence edge to ExitSU to model the live-out latency. This is required
860 // for vreg defs with no in-region use, and prefetches with no vreg def.
861 //
862 // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
863 // check currently relies on being called before adding chain deps.
864 if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
865 SDep Dep(SU, SDep::Artificial);
866 Dep.setLatency(SU->Latency - 1);
867 ExitSU.addPred(Dep);
868 }
869
870 // Add memory dependencies (Note: isStoreToStackSlot and
871 // isLoadFromStackSLot are not usable after stack slots are lowered to
872 // actual addresses).
873
874 const TargetInstrInfo *TII = ST.getInstrInfo();
875 // This is a barrier event that acts as a pivotal node in the DAG.
876 if (TII->isGlobalMemoryObject(&MI)) {
877
878 // Become the barrier chain.
879 if (BarrierChain)
880 BarrierChain->addPredBarrier(SU);
881 BarrierChain = SU;
882
883 LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
884 << BarrierChain->NodeNum << ").\n");
885
886 // Add dependencies against everything below it and clear maps.
887 addBarrierChain(Stores);
888 addBarrierChain(Loads);
889 addBarrierChain(NonAliasStores);
890 addBarrierChain(NonAliasLoads);
891 addBarrierChain(FPExceptions);
892
893 continue;
894 }
895
896 // Instructions that may raise FP exceptions may not be moved
897 // across any global barriers.
898 if (MI.mayRaiseFPException()) {
899 if (BarrierChain)
900 BarrierChain->addPredBarrier(SU);
901
902 if (FPExceptions.size() + 1 >= HugeRegion) {
904 dbgs()
905 << "Creating barrier chain and clearing FPExceptions map.\n");
906 BarrierChain = SU;
907 addBarrierChain(FPExceptions);
908 } else {
909 FPExceptions.insert(SU, UnknownValue);
910 }
911 }
912
913 // If it's not a store or a variant load, we're done.
914 if (!MI.mayStore() &&
915 !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
916 continue;
917
919
920 // Always add dependecy edge to BarrierChain if present.
921 if (BarrierChain)
922 BarrierChain->addPredBarrier(SU);
923
924 // Reduce maps if they grow huge.
926 LLVM_DEBUG(dbgs() << "Creating barrier chain and clearing maps.\n");
927
928 BarrierChain = SU;
929
930 addBarrierChain(Stores);
931 addBarrierChain(Loads);
932 addBarrierChain(NonAliasStores);
933 addBarrierChain(NonAliasLoads);
934
935 MemOpsProcessed = 0;
936 continue;
937 }
938
939 // Find the underlying objects for MI. The Objs vector is either
940 // empty, or filled with the Values of memory locations which this
941 // SU depends on.
943 bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
944 MF.getDataLayout());
945
946 if (MI.mayStore()) {
947 if (!ObjsFound) {
948 // An unknown store depends on all stores and loads.
949 addChainDependencies(SU, Stores);
950 addChainDependencies(SU, NonAliasStores);
951 addChainDependencies(SU, Loads);
952 addChainDependencies(SU, NonAliasLoads);
953
954 // Map this store to 'UnknownValue'.
955 Stores.insert(SU, UnknownValue);
956 } else {
957 // Add precise dependencies against all previously seen memory
958 // accesses mapped to the same Value(s).
959 for (const UnderlyingObject &UnderlObj : Objs) {
960 ValueType V = UnderlObj.getValue();
961 bool ThisMayAlias = UnderlObj.mayAlias();
962
963 // Add dependencies to previous stores and loads mapped to V.
964 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
965 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
966 }
967 // Update the store map after all chains have been added to avoid adding
968 // self-loop edge if multiple underlying objects are present.
969 for (const UnderlyingObject &UnderlObj : Objs) {
970 ValueType V = UnderlObj.getValue();
971 bool ThisMayAlias = UnderlObj.mayAlias();
972
973 // Map this store to V.
974 (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
975 }
976 // The store may have dependencies to unanalyzable loads and
977 // stores.
980 }
981 } else { // SU is a load.
982 if (!ObjsFound) {
983 // An unknown load depends on all stores.
984 addChainDependencies(SU, Stores);
985 addChainDependencies(SU, NonAliasStores);
986
987 Loads.insert(SU, UnknownValue);
988 } else {
989 for (const UnderlyingObject &UnderlObj : Objs) {
990 ValueType V = UnderlObj.getValue();
991 bool ThisMayAlias = UnderlObj.mayAlias();
992
993 // Add precise dependencies against all previously seen stores
994 // mapping to the same Value(s).
995 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
996
997 // Map this load to V.
998 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
999 }
1000 // The load may have dependencies to unanalyzable stores.
1002 }
1003 }
1004 }
1005
1006 if (DbgMI)
1007 FirstDbgValue = DbgMI;
1008
1009 Defs.clear();
1010 Uses.clear();
1011 CurrentVRegDefs.clear();
1012 CurrentVRegUses.clear();
1013
1014 Topo.MarkDirty();
1015}
1016
1018 PSV->printCustom(OS);
1019 return OS;
1020}
1021
1023 for (const auto &[ValType, SUs] : *this) {
1024 if (isa<const Value *>(ValType)) {
1025 const Value *V = cast<const Value *>(ValType);
1026 if (isa<UndefValue>(V))
1027 dbgs() << "Unknown";
1028 else
1029 V->printAsOperand(dbgs());
1030 } else if (isa<const PseudoSourceValue *>(ValType))
1032 else
1033 llvm_unreachable("Unknown Value type.");
1034
1035 dbgs() << " : ";
1036 dumpSUList(SUs);
1037 }
1038}
1039
1041 MachineInstr &MI, bool addToLiveRegs) {
1042 for (MachineOperand &MO : MI.operands()) {
1043 if (!MO.isReg() || !MO.readsReg())
1044 continue;
1045 Register Reg = MO.getReg();
1046 if (!Reg)
1047 continue;
1048
1049 // Things that are available after the instruction are killed by it.
1050 bool IsKill = LiveRegs.available(Reg);
1051
1052 // Exception: Do not kill reserved registers
1053 MO.setIsKill(IsKill && !MRI.isReserved(Reg));
1054 if (addToLiveRegs)
1055 LiveRegs.addReg(Reg);
1056 }
1057}
1058
1060 LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1061
1062 LiveRegs.init(*TRI);
1063 LiveRegs.addLiveOuts(MBB);
1064
1065 // Examine block from end to start...
1066 for (MachineInstr &MI : llvm::reverse(MBB)) {
1067 if (MI.isDebugOrPseudoInstr())
1068 continue;
1069
1070 // Update liveness. Registers that are defed but not used in this
1071 // instruction are now dead. Mark register and all subregs as they
1072 // are completely defined.
1073 for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1074 const MachineOperand &MO = *O;
1075 if (MO.isReg()) {
1076 if (!MO.isDef())
1077 continue;
1078 Register Reg = MO.getReg();
1079 if (!Reg)
1080 continue;
1081 LiveRegs.removeReg(Reg);
1082 } else if (MO.isRegMask()) {
1083 LiveRegs.removeRegsNotPreserved(MO.getRegMask());
1084 }
1085 }
1086
1087 // If there is a bundle header fix it up first.
1088 if (!MI.isBundled()) {
1089 toggleKills(MRI, LiveRegs, MI, true);
1090 } else {
1091 MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1092 if (MI.isBundle())
1093 toggleKills(MRI, LiveRegs, MI, false);
1094
1095 // Some targets make the (questionable) assumtion that the instructions
1096 // inside the bundle are ordered and consequently only the last use of
1097 // a register inside the bundle can kill it.
1098 MachineBasicBlock::instr_iterator I = std::next(Bundle);
1099 while (I->isBundledWithSucc())
1100 ++I;
1101 do {
1102 if (!I->isDebugOrPseudoInstr())
1103 toggleKills(MRI, LiveRegs, *I, true);
1104 --I;
1105 } while (I != Bundle);
1106 }
1107 }
1108}
1109
1110void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1112 dumpNodeName(SU);
1113 if (SchedPrintCycles)
1114 dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
1115 << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
1116 dbgs() << ": ";
1117 SU.getInstr()->dump();
1118#endif
1119}
1120
1122#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1123 if (EntrySU.getInstr() != nullptr)
1125 for (const SUnit &SU : SUnits)
1126 dumpNodeAll(SU);
1127 if (ExitSU.getInstr() != nullptr)
1129#endif
1130}
1131
1132std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1133 std::string s;
1134 raw_string_ostream oss(s);
1135 if (SU == &EntrySU)
1136 oss << "<entry>";
1137 else if (SU == &ExitSU)
1138 oss << "<exit>";
1139 else
1140 SU->getInstr()->print(oss, /*IsStandalone=*/true);
1141 return s;
1142}
1143
1144/// Return the basic block label. It is not necessarily unique because a block
1145/// contains multiple scheduling regions. But it is fine for visualization.
1147 return "dag." + BB->getFullName();
1148}
1149
1151 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1152}
1153
1154bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1155 if (SuccSU != &ExitSU) {
1156 // Do not use WillCreateCycle, it assumes SD scheduling.
1157 // If Pred is reachable from Succ, then the edge creates a cycle.
1158 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1159 return false;
1160 Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1161 }
1162 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1163 // Return true regardless of whether a new edge needed to be inserted.
1164 return true;
1165}
1166
1167//===----------------------------------------------------------------------===//
1168// SchedDFSResult Implementation
1169//===----------------------------------------------------------------------===//
1170
1171namespace llvm {
1172
1173/// Internal state used to compute SchedDFSResult.
1175 SchedDFSResult &R;
1176
1177 /// Join DAG nodes into equivalence classes by their subtree.
1178 IntEqClasses SubtreeClasses;
1179 /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1180 std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1181
1182 struct RootData {
1183 unsigned NodeID;
1184 unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1185 unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1186 /// children.
1187
1188 RootData(unsigned id): NodeID(id),
1189 ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1190
1191 unsigned getSparseSetIndex() const { return NodeID; }
1192 };
1193
1194 SparseSet<RootData> RootSet;
1195
1196public:
1197 SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1198 RootSet.setUniverse(R.DFSNodeData.size());
1199 }
1200
1201 /// Returns true if this node been visited by the DFS traversal.
1202 ///
1203 /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1204 /// ID. Later, SubtreeID is updated but remains valid.
1205 bool isVisited(const SUnit *SU) const {
1206 return R.DFSNodeData[SU->NodeNum].SubtreeID
1207 != SchedDFSResult::InvalidSubtreeID;
1208 }
1209
1210 /// Initializes this node's instruction count. We don't need to flag the node
1211 /// visited until visitPostorder because the DAG cannot have cycles.
1212 void visitPreorder(const SUnit *SU) {
1213 R.DFSNodeData[SU->NodeNum].InstrCount =
1214 SU->getInstr()->isTransient() ? 0 : 1;
1215 }
1216
1217 /// Called once for each node after all predecessors are visited. Revisit this
1218 /// node's predecessors and potentially join them now that we know the ILP of
1219 /// the other predecessors.
1220 void visitPostorderNode(const SUnit *SU) {
1221 // Mark this node as the root of a subtree. It may be joined with its
1222 // successors later.
1223 R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1224 RootData RData(SU->NodeNum);
1225 RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1226
1227 // If any predecessors are still in their own subtree, they either cannot be
1228 // joined or are large enough to remain separate. If this parent node's
1229 // total instruction count is not greater than a child subtree by at least
1230 // the subtree limit, then try to join it now since splitting subtrees is
1231 // only useful if multiple high-pressure paths are possible.
1232 unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1233 for (const SDep &PredDep : SU->Preds) {
1234 if (PredDep.getKind() != SDep::Data)
1235 continue;
1236 unsigned PredNum = PredDep.getSUnit()->NodeNum;
1237 if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1238 joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1239
1240 // Either link or merge the TreeData entry from the child to the parent.
1241 if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1242 // If the predecessor's parent is invalid, this is a tree edge and the
1243 // current node is the parent.
1244 if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1245 RootSet[PredNum].ParentNodeID = SU->NodeNum;
1246 }
1247 else if (RootSet.count(PredNum)) {
1248 // The predecessor is not a root, but is still in the root set. This
1249 // must be the new parent that it was just joined to. Note that
1250 // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1251 // set to the original parent.
1252 RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1253 RootSet.erase(PredNum);
1254 }
1255 }
1256 RootSet[SU->NodeNum] = RData;
1257 }
1258
1259 /// Called once for each tree edge after calling visitPostOrderNode on
1260 /// the predecessor. Increment the parent node's instruction count and
1261 /// preemptively join this subtree to its parent's if it is small enough.
1262 void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1263 R.DFSNodeData[Succ->NodeNum].InstrCount
1264 += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1265 joinPredSubtree(PredDep, Succ);
1266 }
1267
1268 /// Adds a connection for cross edges.
1269 void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1270 ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
1271 }
1272
1273 /// Sets each node's subtree ID to the representative ID and record
1274 /// connections between trees.
1275 void finalize() {
1276 SubtreeClasses.compress();
1277 R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1278 assert(SubtreeClasses.getNumClasses() == RootSet.size()
1279 && "number of roots should match trees");
1280 for (const RootData &Root : RootSet) {
1281 unsigned TreeID = SubtreeClasses[Root.NodeID];
1282 if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1283 R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1284 R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1285 // Note that SubInstrCount may be greater than InstrCount if we joined
1286 // subtrees across a cross edge. InstrCount will be attributed to the
1287 // original parent, while SubInstrCount will be attributed to the joined
1288 // parent.
1289 }
1290 R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1291 R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1292 LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1293 for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1294 R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1295 LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1296 << R.DFSNodeData[Idx].SubtreeID << '\n');
1297 }
1298 for (const auto &[Pred, Succ] : ConnectionPairs) {
1299 unsigned PredTree = SubtreeClasses[Pred->NodeNum];
1300 unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
1301 if (PredTree == SuccTree)
1302 continue;
1303 unsigned Depth = Pred->getDepth();
1304 addConnection(PredTree, SuccTree, Depth);
1305 addConnection(SuccTree, PredTree, Depth);
1306 }
1307 }
1308
1309protected:
1310 /// Joins the predecessor subtree with the successor that is its DFS parent.
1311 /// Applies some heuristics before joining.
1312 bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1313 bool CheckLimit = true) {
1314 assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1315
1316 // Check if the predecessor is already joined.
1317 const SUnit *PredSU = PredDep.getSUnit();
1318 unsigned PredNum = PredSU->NodeNum;
1319 if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1320 return false;
1321
1322 // Four is the magic number of successors before a node is considered a
1323 // pinch point.
1324 unsigned NumDataSucs = 0;
1325 for (const SDep &SuccDep : PredSU->Succs) {
1326 if (SuccDep.getKind() == SDep::Data) {
1327 if (++NumDataSucs >= 4)
1328 return false;
1329 }
1330 }
1331 if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1332 return false;
1333 R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1334 SubtreeClasses.join(Succ->NodeNum, PredNum);
1335 return true;
1336 }
1337
1338 /// Called by finalize() to record a connection between trees.
1339 void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1340 if (!Depth)
1341 return;
1342
1343 do {
1345 R.SubtreeConnections[FromTree];
1346 for (SchedDFSResult::Connection &C : Connections) {
1347 if (C.TreeID == ToTree) {
1348 C.Level = std::max(C.Level, Depth);
1349 return;
1350 }
1351 }
1352 Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1353 FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1354 } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1355 }
1356};
1357
1358} // end namespace llvm
1359
1360namespace {
1361
1362/// Manage the stack used by a reverse depth-first search over the DAG.
1363class SchedDAGReverseDFS {
1364 std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1365
1366public:
1367 bool isComplete() const { return DFSStack.empty(); }
1368
1369 void follow(const SUnit *SU) {
1370 DFSStack.emplace_back(SU, SU->Preds.begin());
1371 }
1372 void advance() { ++DFSStack.back().second; }
1373
1374 const SDep *backtrack() {
1375 DFSStack.pop_back();
1376 return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1377 }
1378
1379 const SUnit *getCurr() const { return DFSStack.back().first; }
1380
1381 SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1382
1383 SUnit::const_pred_iterator getPredEnd() const {
1384 return getCurr()->Preds.end();
1385 }
1386};
1387
1388} // end anonymous namespace
1389
1390static bool hasDataSucc(const SUnit *SU) {
1391 for (const SDep &SuccDep : SU->Succs) {
1392 if (SuccDep.getKind() == SDep::Data &&
1393 !SuccDep.getSUnit()->isBoundaryNode())
1394 return true;
1395 }
1396 return false;
1397}
1398
1399/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1400/// search from this root.
1402 if (!IsBottomUp)
1403 llvm_unreachable("Top-down ILP metric is unimplemented");
1404
1405 SchedDFSImpl Impl(*this);
1406 for (const SUnit &SU : SUnits) {
1407 if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1408 continue;
1409
1410 SchedDAGReverseDFS DFS;
1411 Impl.visitPreorder(&SU);
1412 DFS.follow(&SU);
1413 while (true) {
1414 // Traverse the leftmost path as far as possible.
1415 while (DFS.getPred() != DFS.getPredEnd()) {
1416 const SDep &PredDep = *DFS.getPred();
1417 DFS.advance();
1418 // Ignore non-data edges.
1419 if (PredDep.getKind() != SDep::Data
1420 || PredDep.getSUnit()->isBoundaryNode()) {
1421 continue;
1422 }
1423 // An already visited edge is a cross edge, assuming an acyclic DAG.
1424 if (Impl.isVisited(PredDep.getSUnit())) {
1425 Impl.visitCrossEdge(PredDep, DFS.getCurr());
1426 continue;
1427 }
1428 Impl.visitPreorder(PredDep.getSUnit());
1429 DFS.follow(PredDep.getSUnit());
1430 }
1431 // Visit the top of the stack in postorder and backtrack.
1432 const SUnit *Child = DFS.getCurr();
1433 const SDep *PredDep = DFS.backtrack();
1434 Impl.visitPostorderNode(Child);
1435 if (PredDep)
1436 Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1437 if (DFS.isComplete())
1438 break;
1439 }
1440 }
1441 Impl.finalize();
1442}
1443
1444/// The root of the given SubtreeID was just scheduled. For all subtrees
1445/// connected to this tree, record the depth of the connection so that the
1446/// nearest connected subtrees can be prioritized.
1447void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1448 for (const Connection &C : SubtreeConnections[SubtreeID]) {
1449 SubtreeConnectLevels[C.TreeID] =
1450 std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1451 LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1452 << SubtreeConnectLevels[C.TreeID] << '\n');
1453 }
1454}
1455
1456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1458 OS << InstrCount << " / " << Length << " = ";
1459 if (!Length)
1460 OS << "BADILP";
1461 else
1462 OS << format("%g", ((double)InstrCount / Length));
1463}
1464
1466 dbgs() << *this << '\n';
1467}
1468
1469[[maybe_unused]]
1471 Val.print(OS);
1472 return OS;
1473}
1474
1475#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
static Register UseReg(const MachineOperand &MO)
IRTranslator LLVM IR MI
Equivalence classes for small integers.
A common definition of LaneBitmask for use in TableGen and CodeGen.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
#define P(N)
static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
static bool hasDataSucc(const SUnit *SU)
static cl::opt< bool > EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), cl::desc("Use TargetSchedModel for latency lookup"))
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::desc("Enable use of AA during MI DAG construction"))
static void dumpSUList(const ScheduleDAGInstrs::SUList &L)
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(500), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
static cl::opt< bool > EnableSchedItins("scheditins", cl::Hidden, cl::init(true), cl::desc("Use InstrItineraryData for latency lookup"))
static cl::opt< bool > SchedPrintCycles("sched-print-cycles", cl::Hidden, cl::init(false), cl::desc("Report top/bottom cycles when dumping SUnit instances"))
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
#define LLVM_DEBUG(...)
Definition Debug.h:119
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
void reComputeSize()
Counts the number of SUs in this map after a reduction.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
void clear()
Clears map from all contents.
void clearList(ValueType V)
Clears the list of SUs mapped to V.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
A set of register units used to track register liveness.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
bool hasImplicitUseOfPhysReg(MCRegister Reg) const
Return true if this instruction implicitly uses the specified physical register.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasTailCall() const
Returns true if the function contains a tail call.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
bool isCall(QueryType Type=AnyInBundle) const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
iterator end()
Definition MapVector.h:69
ValueT & operator[](const KeyT &Key)
Definition MapVector.h:100
Array of PressureDiffs.
LLVM_ABI void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
LLVM_ABI void init(unsigned N)
Initialize an array of N PressureDiffs.
Special value supplied for machine level alias analysis.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
LLVM_ABI void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Kind
These are the different kinds of scheduling dependencies.
Definition ScheduleDAG.h:54
@ Output
A register output-dependence (aka WAW).
Definition ScheduleDAG.h:57
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
Definition ScheduleDAG.h:72
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned NumSuccs
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
SmallVector< SDep, 4 > Succs
All sunit successors.
bool hasReservedResource
Uses a reserved resource.
bool isCommutable
Is a commutable instruction.
bool hasPhysRegUses
Has physreg uses.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
void finalize()
Sets each node's subtree ID to the representative ID and record connections between trees.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void visitPreorder(const SUnit *SU)
Initializes this node's instruction count.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
SchedDFSImpl(SchedDFSResult &r)
Compute the values of each DAG node for various metrics during DFS.
Definition ScheduleDFS.h:65
friend class SchedDFSImpl
Definition ScheduleDFS.h:66
LLVM_ABI void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
LLVM_ABI void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
LiveRegUnits LiveRegs
Set of live physical registers for updating kill flags.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU's instruction that defines a physical register.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
BatchAAResults * getAAForDep() const
Returns a (possibly null) pointer to the current BatchAAResults.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
RegUnit2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
std::optional< BatchAAResults > AAForDep
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AAResults and Target fail to deny the depende...
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
const MachineFrameInfo & MFI
bool deadDefHasNoUse(const MachineOperand &MO)
Returns true if the def register in MO has no uses.
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
MachineRegisterInfo & MRI
Virtual/real register map.
void clearDAG()
Clears the DAG state (between regions).
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
ScheduleDAG(const ScheduleDAG &)=delete
void dumpNodeAll(const SUnit &SU) const
void dumpNodeName(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition SparseSet.h:117
TargetInstrInfo - Interface to description of machine instruction set.
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
TargetSubtargetInfo - Generic base class for all target subtargets.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
'undef' values are things that do not have specified contents.
Definition Constants.h:1625
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
SmallVector< UnderlyingObject, 4 > UnderlyingObjectsVector
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Represent the ILP of the subDAG rooted at a DAG node.
Definition ScheduleDFS.h:34
unsigned Length
Length may either correspond to depth or height, depending on direction, and cycles or nodes dependin...
Definition ScheduleDFS.h:38
LLVM_ABI void dump() const
LLVM_ABI void print(raw_ostream &OS) const
unsigned InstrCount
Definition ScheduleDFS.h:35
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
constexpr bool any() const
Definition LaneBitmask.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:129
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:74
Record a physical register access.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334
Mapping from virtual register to SUnit including an operand index.
An individual mapping from virtual register number to SUnit.