LLVM 23.0.0git
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass identifies loops where we can generate the Hexagon hardware
10// loop instruction. The hardware loop can perform loop branches with a
11// zero-cycle overhead.
12//
13// The pattern that defines the induction variable can changed depending on
14// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15// normalizes induction variables, and the Loop Strength Reduction pass
16// run by 'llc' may also make changes to the induction variable.
17// The pattern detected by this phase is due to running Strength Reduction.
18//
19// Criteria for hardware loops:
20// - Countable loops (w/ ind. var for a trip count)
21// - Assumes loops are normalized by IndVarSimplify
22// - Try inner-most loops first
23// - No function calls in loops.
24//
25//===----------------------------------------------------------------------===//
26
27#include "Hexagon.h"
28#include "HexagonInstrInfo.h"
29#include "HexagonSubtarget.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
47#include "llvm/IR/DebugLoc.h"
49#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <cstdint>
57#include <cstdlib>
58#include <iterator>
59#include <map>
60#include <set>
61#include <string>
62#include <utility>
63#include <vector>
64
65using namespace llvm;
66
67#define DEBUG_TYPE "hwloops"
68
69#ifndef NDEBUG
70static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
71
72// Option to create preheader only for a specific function.
73static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
74 cl::init(""));
75#endif
76
77// Option to create a preheader if one doesn't exist.
78static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
79 cl::Hidden, cl::init(true),
80 cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
81
82// Turn it off by default. If a preheader block is not created here, the
83// software pipeliner may be unable to find a block suitable to serve as
84// a preheader. In that case SWP will not run.
85static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,
86 cl::desc("Allow speculation of preheader "
87 "instructions"));
88
89STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
90
91namespace {
92
93 class CountValue;
94
95 struct HexagonHardwareLoops : public MachineFunctionPass {
96 MachineLoopInfo *MLI;
99 const HexagonInstrInfo *TII;
102#ifndef NDEBUG
103 static int Counter;
104#endif
105
106 public:
107 static char ID;
108
109 HexagonHardwareLoops() : MachineFunctionPass(ID) {}
110
111 bool runOnMachineFunction(MachineFunction &MF) override;
112
113 StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
114
115 void getAnalysisUsage(AnalysisUsage &AU) const override {
116 AU.addRequired<MachineDominatorTreeWrapperPass>();
117 AU.addRequired<MachineLoopInfoWrapperPass>();
118 AU.addRequired<MachineOptimizationRemarkEmitterPass>();
120 }
121
122 private:
123 using LoopFeederMap = std::map<Register, MachineInstr *>;
124
125 /// Kinds of comparisons in the compare instructions.
126 struct Comparison {
127 enum Kind {
128 EQ = 0x01,
129 NE = 0x02,
130 L = 0x04,
131 G = 0x08,
132 U = 0x40,
133 LTs = L,
134 LEs = L | EQ,
135 GTs = G,
136 GEs = G | EQ,
137 LTu = L | U,
138 LEu = L | EQ | U,
139 GTu = G | U,
140 GEu = G | EQ | U
141 };
142
143 static Kind getSwappedComparison(Kind Cmp) {
144 assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
145 if ((Cmp & L) || (Cmp & G))
146 return (Kind)(Cmp ^ (L|G));
147 return Cmp;
148 }
149
150 static Kind getNegatedComparison(Kind Cmp) {
151 if ((Cmp & L) || (Cmp & G))
152 return (Kind)((Cmp ^ (L | G)) ^ EQ);
153 if ((Cmp & NE) || (Cmp & EQ))
154 return (Kind)(Cmp ^ (EQ | NE));
155 return (Kind)0;
156 }
157
158 static bool isSigned(Kind Cmp) {
159 return (Cmp & (L | G) && !(Cmp & U));
160 }
161
162 static bool isUnsigned(Kind Cmp) {
163 return (Cmp & U);
164 }
165 };
166
167 /// Find the register that contains the loop controlling
168 /// induction variable.
169 /// If successful, it will return true and set the \p Reg, \p IVBump
170 /// and \p IVOp arguments. Otherwise it will return false.
171 /// The returned induction register is the register R that follows the
172 /// following induction pattern:
173 /// loop:
174 /// R = phi ..., [ R.next, LatchBlock ]
175 /// R.next = R + #bump
176 /// if (R.next < #N) goto loop
177 /// IVBump is the immediate value added to R, and IVOp is the instruction
178 /// "R.next = R + #bump".
179 bool findInductionRegister(MachineLoop *L, Register &Reg,
180 int64_t &IVBump, MachineInstr *&IVOp) const;
181
182 /// Return the comparison kind for the specified opcode.
183 Comparison::Kind getComparisonKind(unsigned CondOpc,
184 MachineOperand *InitialValue,
185 const MachineOperand *Endvalue,
186 int64_t IVBump) const;
187
188 /// Analyze the statements in a loop to determine if the loop
189 /// has a computable trip count and, if so, return a value that represents
190 /// the trip count expression.
191 CountValue *getLoopTripCount(MachineLoop *L,
192 SmallVectorImpl<MachineInstr *> &OldInsts);
193
194 /// Return the expression that represents the number of times
195 /// a loop iterates. The function takes the operands that represent the
196 /// loop start value, loop end value, and induction value. Based upon
197 /// these operands, the function attempts to compute the trip count.
198 /// If the trip count is not directly available (as an immediate value,
199 /// or a register), the function will attempt to insert computation of it
200 /// to the loop's preheader.
201 CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
202 const MachineOperand *End, Register IVReg,
203 int64_t IVBump, Comparison::Kind Cmp) const;
204
205 /// Return true if the instruction is not valid within a hardware
206 /// loop.
207 bool isInvalidLoopOperation(const MachineInstr *MI,
208 bool IsInnerHWLoop) const;
209
210 /// Return true if the loop contains an instruction that inhibits
211 /// using the hardware loop.
212 bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
213
214 /// Given a loop, check if we can convert it to a hardware loop.
215 /// If so, then perform the conversion and return true.
216 bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
217
218 /// Return true if the instruction is now dead.
219 bool isDead(const MachineInstr *MI,
220 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
221
222 /// Remove the instruction if it is now dead.
223 void removeIfDead(MachineInstr *MI);
224
225 /// Make sure that the "bump" instruction executes before the
226 /// compare. We need that for the IV fixup, so that the compare
227 /// instruction would not use a bumped value that has not yet been
228 /// defined. If the instructions are out of order, try to reorder them.
229 bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
230
231 /// Return true if MO and MI pair is visited only once. If visited
232 /// more than once, this indicates there is recursion. In such a case,
233 /// return false.
234 bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
235 const MachineOperand *MO,
236 LoopFeederMap &LoopFeederPhi) const;
237
238 /// Return true if the Phi may generate a value that may underflow,
239 /// or may wrap.
240 bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
241 MachineBasicBlock *MBB, MachineLoop *L,
242 LoopFeederMap &LoopFeederPhi) const;
243
244 /// Return true if the induction variable may underflow an unsigned
245 /// value in the first iteration.
246 bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
247 const MachineOperand *EndVal,
248 MachineBasicBlock *MBB, MachineLoop *L,
249 LoopFeederMap &LoopFeederPhi) const;
250
251 /// Check if the given operand has a compile-time known constant
252 /// value. Return true if yes, and false otherwise. When returning true, set
253 /// Val to the corresponding constant value.
254 bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
255
256 /// Check if the operand has a compile-time known constant value.
257 bool isImmediate(const MachineOperand &MO) const {
258 int64_t V;
259 return checkForImmediate(MO, V);
260 }
261
262 /// Return the immediate for the specified operand.
263 int64_t getImmediate(const MachineOperand &MO) const {
264 int64_t V;
265 if (!checkForImmediate(MO, V))
266 llvm_unreachable("Invalid operand");
267 return V;
268 }
269
270 /// Reset the given machine operand to now refer to a new immediate
271 /// value. Assumes that the operand was already referencing an immediate
272 /// value, either directly, or via a register.
273 void setImmediate(MachineOperand &MO, int64_t Val);
274
275 /// Fix the data flow of the induction variable.
276 /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
277 /// |
278 /// +-> back to phi
279 /// where "bump" is the increment of the induction variable:
280 /// iv = iv + #const.
281 /// Due to some prior code transformations, the actual flow may look
282 /// like this:
283 /// phi -+-> bump ---> back to phi
284 /// |
285 /// +-> comparison-in-latch (against upper_bound-bump),
286 /// i.e. the comparison that controls the loop execution may be using
287 /// the value of the induction variable from before the increment.
288 ///
289 /// Return true if the loop's flow is the desired one (i.e. it's
290 /// either been fixed, or no fixing was necessary).
291 /// Otherwise, return false. This can happen if the induction variable
292 /// couldn't be identified, or if the value in the latch's comparison
293 /// cannot be adjusted to reflect the post-bump value.
294 bool fixupInductionVariable(MachineLoop *L);
295
296 /// Given a loop, if it does not have a preheader, create one.
297 /// Return the block that is the preheader.
298 MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
299 };
300
301 char HexagonHardwareLoops::ID = 0;
302#ifndef NDEBUG
303 int HexagonHardwareLoops::Counter = 0;
304#endif
305
306 /// Abstraction for a trip count of a loop. A smaller version
307 /// of the MachineOperand class without the concerns of changing the
308 /// operand representation.
309 class CountValue {
310 public:
311 enum CountValueType {
312 CV_Register,
313 CV_Immediate
314 };
315
316 private:
317 CountValueType Kind;
318 union Values {
319 Values() : R{Register(), 0} {}
320 Values(const Values&) = default;
321 struct {
323 unsigned Sub;
324 } R;
325 unsigned ImmVal;
326 } Contents;
327
328 public:
329 explicit CountValue(CountValueType t, Register v, unsigned u = 0) {
330 Kind = t;
331 if (Kind == CV_Register) {
332 Contents.R.Reg = v;
333 Contents.R.Sub = u;
334 } else {
335 Contents.ImmVal = v;
336 }
337 }
338
339 bool isReg() const { return Kind == CV_Register; }
340 bool isImm() const { return Kind == CV_Immediate; }
341
342 Register getReg() const {
343 assert(isReg() && "Wrong CountValue accessor");
344 return Contents.R.Reg;
345 }
346
347 unsigned getSubReg() const {
348 assert(isReg() && "Wrong CountValue accessor");
349 return Contents.R.Sub;
350 }
351
352 unsigned getImm() const {
353 assert(isImm() && "Wrong CountValue accessor");
354 return Contents.ImmVal;
355 }
356
357 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
358 if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
359 if (isImm()) { OS << Contents.ImmVal; }
360 }
361 };
362
363} // end anonymous namespace
364
365INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
366 "Hexagon Hardware Loops", false, false)
369INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
370 "Hexagon Hardware Loops", false, false)
371
373 return new HexagonHardwareLoops();
374}
375
376bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
377 LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
378 if (skipFunction(MF.getFunction()))
379 return false;
380
381 bool Changed = false;
382
383 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
384 MRI = &MF.getRegInfo();
385 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
387 TII = HST.getInstrInfo();
388 TRI = HST.getRegisterInfo();
389
390 MORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
391
392 for (auto &L : *MLI)
393 if (L->isOutermost()) {
394 bool L0Used = false;
395 bool L1Used = false;
396 Changed |= convertToHardwareLoop(L, L0Used, L1Used);
397 }
398
399 return Changed;
400}
401
402bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
403 Register &Reg,
404 int64_t &IVBump,
405 MachineInstr *&IVOp
406 ) const {
407 MachineBasicBlock *Header = L->getHeader();
408 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
409 MachineBasicBlock *Latch = L->getLoopLatch();
410 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
411 if (!Header || !Preheader || !Latch || !ExitingBlock)
412 return false;
413
414 // This pair represents an induction register together with an immediate
415 // value that will be added to it in each loop iteration.
416 using RegisterBump = std::pair<Register, int64_t>;
417
418 // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
419 // from an induction operation
420 // R.next = R + bump
421 // where bump is an immediate value.
422 using InductionMap = std::map<Register, RegisterBump>;
423
424 InductionMap IndMap;
425
426 using instr_iterator = MachineBasicBlock::instr_iterator;
427
428 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
429 I != E && I->isPHI(); ++I) {
430 MachineInstr *Phi = &*I;
431
432 // Have a PHI instruction. Get the operand that corresponds to the
433 // latch block, and see if is a result of an addition of form "reg+imm",
434 // where the "reg" is defined by the PHI node we are looking at.
435 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
436 if (Phi->getOperand(i+1).getMBB() != Latch)
437 continue;
438
439 Register PhiOpReg = Phi->getOperand(i).getReg();
440 MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
441
442 if (DI->getDesc().isAdd()) {
443 // If the register operand to the add is the PHI we're looking at, this
444 // meets the induction pattern.
445 Register IndReg = DI->getOperand(1).getReg();
446 MachineOperand &Opnd2 = DI->getOperand(2);
447 int64_t V;
448 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
449 Register UpdReg = DI->getOperand(0).getReg();
450 IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
451 }
452 }
453 } // for (i)
454 } // for (instr)
455
457 MachineBasicBlock *TB = nullptr, *FB = nullptr;
458 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
459 if (NotAnalyzed)
460 return false;
461
462 Register PredR;
463 unsigned PredPos;
464 RegState PredRegFlags;
465 if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
466 return false;
467
468 MachineInstr *PredI = MRI->getVRegDef(PredR);
469 if (!PredI->isCompare())
470 return false;
471
472 Register CmpReg1, CmpReg2;
473 int64_t CmpImm = 0, CmpMask = 0;
474 bool CmpAnalyzed =
475 TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
476 // Fail if the compare was not analyzed, or it's not comparing a register
477 // with an immediate value. Not checking the mask here, since we handle
478 // the individual compare opcodes (including A4_cmpb*) later on.
479 if (!CmpAnalyzed)
480 return false;
481
482 // Exactly one of the input registers to the comparison should be among
483 // the induction registers.
484 InductionMap::iterator IndMapEnd = IndMap.end();
485 InductionMap::iterator F = IndMapEnd;
486 if (CmpReg1 != 0) {
487 InductionMap::iterator F1 = IndMap.find(CmpReg1);
488 if (F1 != IndMapEnd)
489 F = F1;
490 }
491 if (CmpReg2 != 0) {
492 InductionMap::iterator F2 = IndMap.find(CmpReg2);
493 if (F2 != IndMapEnd) {
494 if (F != IndMapEnd)
495 return false;
496 F = F2;
497 }
498 }
499 if (F == IndMapEnd)
500 return false;
501
502 Reg = F->second.first;
503 IVBump = F->second.second;
504 IVOp = MRI->getVRegDef(F->first);
505 return true;
506}
507
508// Return the comparison kind for the specified opcode.
509HexagonHardwareLoops::Comparison::Kind
510HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
511 MachineOperand *InitialValue,
512 const MachineOperand *EndValue,
513 int64_t IVBump) const {
514 Comparison::Kind Cmp = (Comparison::Kind)0;
515 switch (CondOpc) {
516 case Hexagon::C2_cmpeq:
517 case Hexagon::C2_cmpeqi:
518 case Hexagon::C2_cmpeqp:
519 Cmp = Comparison::EQ;
520 break;
521 case Hexagon::C4_cmpneq:
522 case Hexagon::C4_cmpneqi:
523 Cmp = Comparison::NE;
524 break;
525 case Hexagon::C2_cmplt:
526 Cmp = Comparison::LTs;
527 break;
528 case Hexagon::C2_cmpltu:
529 Cmp = Comparison::LTu;
530 break;
531 case Hexagon::C4_cmplte:
532 case Hexagon::C4_cmpltei:
533 Cmp = Comparison::LEs;
534 break;
535 case Hexagon::C4_cmplteu:
536 case Hexagon::C4_cmplteui:
537 Cmp = Comparison::LEu;
538 break;
539 case Hexagon::C2_cmpgt:
540 case Hexagon::C2_cmpgti:
541 case Hexagon::C2_cmpgtp:
542 Cmp = Comparison::GTs;
543 break;
544 case Hexagon::C2_cmpgtu:
545 case Hexagon::C2_cmpgtui:
546 case Hexagon::C2_cmpgtup:
547 Cmp = Comparison::GTu;
548 break;
549 case Hexagon::C2_cmpgei:
550 Cmp = Comparison::GEs;
551 break;
552 case Hexagon::C2_cmpgeui:
553 Cmp = Comparison::GEs;
554 break;
555 default:
556 return (Comparison::Kind)0;
557 }
558 return Cmp;
559}
560
561/// Analyze the statements in a loop to determine if the loop has
562/// a computable trip count and, if so, return a value that represents
563/// the trip count expression.
564///
565/// This function iterates over the phi nodes in the loop to check for
566/// induction variable patterns that are used in the calculation for
567/// the number of time the loop is executed.
568CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
569 SmallVectorImpl<MachineInstr *> &OldInsts) {
570 MachineBasicBlock *TopMBB = L->getTopBlock();
572 assert(PI != TopMBB->pred_end() &&
573 "Loop must have more than one incoming edge!");
574 MachineBasicBlock *Backedge = *PI++;
575 if (PI == TopMBB->pred_end()) // dead loop?
576 return nullptr;
577 MachineBasicBlock *Incoming = *PI++;
578 if (PI != TopMBB->pred_end()) // multiple backedges?
579 return nullptr;
580
581 // Make sure there is one incoming and one backedge and determine which
582 // is which.
583 if (L->contains(Incoming)) {
584 if (L->contains(Backedge))
585 return nullptr;
586 std::swap(Incoming, Backedge);
587 } else if (!L->contains(Backedge))
588 return nullptr;
589
590 // Look for the cmp instruction to determine if we can get a useful trip
591 // count. The trip count can be either a register or an immediate. The
592 // location of the value depends upon the type (reg or imm).
593 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
594 if (!ExitingBlock)
595 return nullptr;
596
597 Register IVReg = 0;
598 int64_t IVBump = 0;
599 MachineInstr *IVOp;
600 bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
601 if (!FoundIV)
602 return nullptr;
603
604 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
605
606 MachineOperand *InitialValue = nullptr;
607 MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
608 MachineBasicBlock *Latch = L->getLoopLatch();
609 for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
610 MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
611 if (MBB == Preheader)
612 InitialValue = &IV_Phi->getOperand(i);
613 else if (MBB == Latch)
614 IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
615 }
616 if (!InitialValue)
617 return nullptr;
618
620 MachineBasicBlock *TB = nullptr, *FB = nullptr;
621 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
622 if (NotAnalyzed)
623 return nullptr;
624
625 MachineBasicBlock *Header = L->getHeader();
626 // TB must be non-null. If FB is also non-null, one of them must be
627 // the header. Otherwise, branch to TB could be exiting the loop, and
628 // the fall through can go to the header.
629 assert (TB && "Exit block without a branch?");
630 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
631 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
633 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
634 if (NotAnalyzed)
635 return nullptr;
636 if (TB == Latch)
637 TB = (LTB == Header) ? LTB : LFB;
638 else
639 FB = (LTB == Header) ? LTB: LFB;
640 }
641 assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
642 if (!TB || (FB && TB != Header && FB != Header))
643 return nullptr;
644
645 // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch
646 // to put imm(0), followed by P in the vector Cond.
647 // If TB is not the header, it means that the "not-taken" path must lead
648 // to the header.
649 bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
650 Register PredReg;
651 unsigned PredPos;
652 RegState PredRegFlags;
653 if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
654 return nullptr;
655 MachineInstr *CondI = MRI->getVRegDef(PredReg);
656 unsigned CondOpc = CondI->getOpcode();
657
658 Register CmpReg1, CmpReg2;
659 int64_t Mask = 0, ImmValue = 0;
660 bool AnalyzedCmp =
661 TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
662 if (!AnalyzedCmp)
663 return nullptr;
664
665 // The comparison operator type determines how we compute the loop
666 // trip count.
667 OldInsts.push_back(CondI);
668 OldInsts.push_back(IVOp);
669
670 // Sadly, the following code gets information based on the position
671 // of the operands in the compare instruction. This has to be done
672 // this way, because the comparisons check for a specific relationship
673 // between the operands (e.g. is-less-than), rather than to find out
674 // what relationship the operands are in (as on PPC).
675 Comparison::Kind Cmp;
676 bool isSwapped = false;
677 const MachineOperand &Op1 = CondI->getOperand(1);
678 const MachineOperand &Op2 = CondI->getOperand(2);
679 const MachineOperand *EndValue = nullptr;
680
681 if (Op1.isReg()) {
682 if (Op2.isImm() || Op1.getReg() == IVReg)
683 EndValue = &Op2;
684 else {
685 EndValue = &Op1;
686 isSwapped = true;
687 }
688 }
689
690 if (!EndValue)
691 return nullptr;
692
693 Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
694 if (!Cmp)
695 return nullptr;
696 if (Negated)
697 Cmp = Comparison::getNegatedComparison(Cmp);
698 if (isSwapped)
699 Cmp = Comparison::getSwappedComparison(Cmp);
700
701 if (InitialValue->isReg()) {
702 Register R = InitialValue->getReg();
703 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
704 if (!MDT->properlyDominates(DefBB, Header)) {
705 int64_t V;
706 if (!checkForImmediate(*InitialValue, V))
707 return nullptr;
708 }
709 OldInsts.push_back(MRI->getVRegDef(R));
710 }
711 if (EndValue->isReg()) {
712 Register R = EndValue->getReg();
713 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
714 if (!MDT->properlyDominates(DefBB, Header)) {
715 int64_t V;
716 if (!checkForImmediate(*EndValue, V))
717 return nullptr;
718 }
719 OldInsts.push_back(MRI->getVRegDef(R));
720 }
721
722 return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
723}
724
725/// Helper function that returns the expression that represents the
726/// number of times a loop iterates. The function takes the operands that
727/// represent the loop start value, loop end value, and induction value.
728/// Based upon these operands, the function attempts to compute the trip count.
729CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
730 const MachineOperand *Start,
731 const MachineOperand *End,
732 Register IVReg,
733 int64_t IVBump,
734 Comparison::Kind Cmp) const {
735 LLVM_DEBUG(llvm::dbgs() << "Loop: " << *Loop << "\n");
736 LLVM_DEBUG(llvm::dbgs() << "Initial Value: " << *Start << "\n");
737 LLVM_DEBUG(llvm::dbgs() << "End Value: " << *End << "\n");
738 LLVM_DEBUG(llvm::dbgs() << "Inc/Dec Value: " << IVBump << "\n");
739 LLVM_DEBUG(llvm::dbgs() << "Comparison: " << Cmp << "\n");
740 // Cannot handle comparison EQ, i.e. while (A == B).
741 if (Cmp == Comparison::EQ)
742 return nullptr;
743
744 // Check if either the start or end values are an assignment of an immediate.
745 // If so, use the immediate value rather than the register.
746 if (Start->isReg()) {
747 const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
748 if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
749 StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
750 Start = &StartValInstr->getOperand(1);
751 }
752 if (End->isReg()) {
753 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
754 if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
755 EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
756 End = &EndValInstr->getOperand(1);
757 }
758
759 if (!Start->isReg() && !Start->isImm())
760 return nullptr;
761 if (!End->isReg() && !End->isImm())
762 return nullptr;
763
764 bool CmpLess = Cmp & Comparison::L;
765 bool CmpGreater = Cmp & Comparison::G;
766 bool CmpHasEqual = Cmp & Comparison::EQ;
767
768 // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
769 if (CmpLess && IVBump < 0)
770 // Loop going while iv is "less" with the iv value going down. Must wrap.
771 return nullptr;
772
773 if (CmpGreater && IVBump > 0)
774 // Loop going while iv is "greater" with the iv value going up. Must wrap.
775 return nullptr;
776
777 // Phis that may feed into the loop.
778 LoopFeederMap LoopFeederPhi;
779
780 // Check if the initial value may be zero and can be decremented in the first
781 // iteration. If the value is zero, the endloop instruction will not decrement
782 // the loop counter, so we shouldn't generate a hardware loop in this case.
783 if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
784 LoopFeederPhi))
785 return nullptr;
786
787 if (Start->isImm() && End->isImm()) {
788 // Both, start and end are immediates.
789 int64_t StartV = Start->getImm();
790 int64_t EndV = End->getImm();
791 int64_t Dist = EndV - StartV;
792 if (Dist == 0)
793 return nullptr;
794
795 bool Exact = (Dist % IVBump) == 0;
796
797 if (Cmp == Comparison::NE) {
798 if (!Exact)
799 return nullptr;
800 if ((Dist < 0) ^ (IVBump < 0))
801 return nullptr;
802 }
803
804 // For comparisons that include the final value (i.e. include equality
805 // with the final value), we need to increase the distance by 1.
806 if (CmpHasEqual)
807 Dist = Dist > 0 ? Dist+1 : Dist-1;
808
809 // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
810 // CmpGreater should imply Dist < 0. These conditions could actually
811 // fail, for example, in unreachable code (which may still appear to be
812 // reachable in the CFG).
813 if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
814 return nullptr;
815
816 // "Normalized" distance, i.e. with the bump set to +-1.
817 int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
818 : (-Dist + (-IVBump - 1)) / (-IVBump);
819 assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
820
821 uint64_t Count = Dist1;
822
823 if (Count > 0xFFFFFFFFULL)
824 return nullptr;
825
826 return new CountValue(CountValue::CV_Immediate, Count);
827 }
828
829 // A general case: Start and End are some values, but the actual
830 // iteration count may not be available. If it is not, insert
831 // a computation of it into the preheader.
832
833 // If the induction variable bump is not a power of 2, quit.
834 // Otherwise we'd need a general integer division.
835 if (!isPowerOf2_64(std::abs(IVBump)))
836 return nullptr;
837
838 MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
839 assert (PH && "Should have a preheader by now");
841 DebugLoc DL;
842 if (InsertPos != PH->end())
843 DL = InsertPos->getDebugLoc();
844
845 // If Start is an immediate and End is a register, the trip count
846 // will be "reg - imm". Hexagon's "subtract immediate" instruction
847 // is actually "reg + -imm".
848
849 // If the loop IV is going downwards, i.e. if the bump is negative,
850 // then the iteration count (computed as End-Start) will need to be
851 // negated. To avoid the negation, just swap Start and End.
852 if (IVBump < 0) {
853 std::swap(Start, End);
854 IVBump = -IVBump;
855 std::swap(CmpLess, CmpGreater);
856 }
857 // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
858 // Signedness, and "including equality" are preserved.
859
860 bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
861 bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
862
863 int64_t StartV = 0, EndV = 0;
864 if (Start->isImm())
865 StartV = Start->getImm();
866 if (End->isImm())
867 EndV = End->getImm();
868
869 int64_t AdjV = 0;
870 // To compute the iteration count, we would need this computation:
871 // Count = (End - Start + (IVBump-1)) / IVBump
872 // or, when CmpHasEqual:
873 // Count = (End - Start + (IVBump-1)+1) / IVBump
874 // The "IVBump-1" part is the adjustment (AdjV). We can avoid
875 // generating an instruction specifically to add it if we can adjust
876 // the immediate values for Start or End.
877
878 if (CmpHasEqual) {
879 // Need to add 1 to the total iteration count.
880 if (Start->isImm())
881 StartV--;
882 else if (End->isImm())
883 EndV++;
884 else
885 AdjV += 1;
886 }
887
888 if (Cmp != Comparison::NE) {
889 if (Start->isImm())
890 StartV -= (IVBump-1);
891 else if (End->isImm())
892 EndV += (IVBump-1);
893 else
894 AdjV += (IVBump-1);
895 }
896
897 Register R = 0;
898 unsigned SR = 0;
899 if (Start->isReg()) {
900 R = Start->getReg();
901 SR = Start->getSubReg();
902 } else {
903 R = End->getReg();
904 SR = End->getSubReg();
905 }
906 const TargetRegisterClass *RC = MRI->getRegClass(R);
907 // Hardware loops cannot handle 64-bit registers. If it's a double
908 // register, it has to have a subregister.
909 if (!SR && RC == &Hexagon::DoubleRegsRegClass)
910 return nullptr;
911 const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
912
913 // Compute DistR (register with the distance between Start and End).
914 Register DistR;
915 unsigned DistSR;
916
917 // Avoid special case, where the start value is an imm(0).
918 if (Start->isImm() && StartV == 0) {
919 DistR = End->getReg();
920 DistSR = End->getSubReg();
921 } else {
922 const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
923 (RegToImm ? TII->get(Hexagon::A2_subri) :
924 TII->get(Hexagon::A2_addi));
925 if (RegToReg || RegToImm) {
926 Register SubR = MRI->createVirtualRegister(IntRC);
927 MachineInstrBuilder SubIB =
928 BuildMI(*PH, InsertPos, DL, SubD, SubR);
929
930 if (RegToReg)
931 SubIB.addReg(End->getReg(), {}, End->getSubReg())
932 .addReg(Start->getReg(), {}, Start->getSubReg());
933 else
934 SubIB.addImm(EndV).addReg(Start->getReg(), {}, Start->getSubReg());
935 DistR = SubR;
936 } else {
937 // If the loop has been unrolled, we should use the original loop count
938 // instead of recalculating the value. This will avoid additional
939 // 'Add' instruction.
940 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
941 if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
942 EndValInstr->getOperand(1).getSubReg() == 0 &&
943 EndValInstr->getOperand(2).getImm() == StartV) {
944 DistR = EndValInstr->getOperand(1).getReg();
945 } else {
946 Register SubR = MRI->createVirtualRegister(IntRC);
947 MachineInstrBuilder SubIB =
948 BuildMI(*PH, InsertPos, DL, SubD, SubR);
949 SubIB.addReg(End->getReg(), {}, End->getSubReg()).addImm(-StartV);
950 DistR = SubR;
951 }
952 }
953 DistSR = 0;
954 }
955
956 // From DistR, compute AdjR (register with the adjusted distance).
957 Register AdjR;
958 unsigned AdjSR;
959
960 if (AdjV == 0) {
961 AdjR = DistR;
962 AdjSR = DistSR;
963 } else {
964 // Generate CountR = ADD DistR, AdjVal
965 Register AddR = MRI->createVirtualRegister(IntRC);
966 MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
967 BuildMI(*PH, InsertPos, DL, AddD, AddR)
968 .addReg(DistR, {}, DistSR)
969 .addImm(AdjV);
970
971 AdjR = AddR;
972 AdjSR = 0;
973 }
974
975 // From AdjR, compute CountR (register with the final count).
976 Register CountR;
977 unsigned CountSR;
978
979 if (IVBump == 1) {
980 CountR = AdjR;
981 CountSR = AdjSR;
982 } else {
983 // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
984 unsigned Shift = Log2_32(IVBump);
985
986 // Generate NormR = LSR DistR, Shift.
987 Register LsrR = MRI->createVirtualRegister(IntRC);
988 const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
989 BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
990 .addReg(AdjR, {}, AdjSR)
991 .addImm(Shift);
992
993 CountR = LsrR;
994 CountSR = 0;
995 }
996
997 const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
998 Register MuxR = CountR;
999 unsigned MuxSR = CountSR;
1000 // For the loop count to be valid unsigned number, CmpLess should imply
1001 // Dist >= 0. Similarly, CmpGreater should imply Dist < 0. We can skip the
1002 // check if the initial distance is zero and the comparison is LTu || LTEu.
1003 if (!(Start->isImm() && StartV == 0 && Comparison::isUnsigned(Cmp) &&
1004 CmpLess) &&
1005 (CmpLess || CmpGreater)) {
1006 // Generate:
1007 // DistCheck = CMP_GT DistR, 0 --> CmpLess
1008 // DistCheck = CMP_GT DistR, -1 --> CmpGreater
1009 Register DistCheckR = MRI->createVirtualRegister(PredRC);
1010 const MCInstrDesc &DistCheckD = TII->get(Hexagon::C2_cmpgti);
1011 BuildMI(*PH, InsertPos, DL, DistCheckD, DistCheckR)
1012 .addReg(DistR, {}, DistSR)
1013 .addImm((CmpLess) ? 0 : -1);
1014
1015 // Generate:
1016 // MUXR = MUX DistCheck, CountR, 1 --> CmpLess
1017 // MUXR = MUX DistCheck, 1, CountR --> CmpGreater
1018 MuxR = MRI->createVirtualRegister(IntRC);
1019 if (CmpLess) {
1020 const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxir);
1021 BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
1022 .addReg(DistCheckR)
1023 .addReg(CountR, {}, CountSR)
1024 .addImm(1);
1025 } else {
1026 const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxri);
1027 BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
1028 .addReg(DistCheckR)
1029 .addImm(1)
1030 .addReg(CountR, {}, CountSR);
1031 }
1032 MuxSR = 0;
1033 }
1034
1035 return new CountValue(CountValue::CV_Register, MuxR, MuxSR);
1036}
1037
1038/// Return true if the operation is invalid within hardware loop.
1039bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
1040 bool IsInnerHWLoop) const {
1041 // Call is not allowed because the callee may use a hardware loop except for
1042 // the case when the call never returns.
1043 if (MI->getDesc().isCall())
1044 return !TII->doesNotReturn(*MI);
1045
1046 // Check if the instruction defines a hardware loop register.
1047 using namespace Hexagon;
1048
1049 static const Register Regs01[] = { LC0, SA0, LC1, SA1 };
1050 static const Register Regs1[] = { LC1, SA1 };
1051 auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01) : ArrayRef(Regs1);
1052 for (Register R : CheckRegs)
1053 if (MI->modifiesRegister(R, TRI))
1054 return true;
1055
1056 return false;
1057}
1058
1059/// Return true if the loop contains an instruction that inhibits
1060/// the use of the hardware loop instruction.
1061bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1062 bool IsInnerHWLoop) const {
1063 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1064 << printMBBReference(**L->block_begin()));
1065 for (MachineBasicBlock *MBB : L->getBlocks()) {
1066 for (const MachineInstr &MI : *MBB) {
1067 if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {
1068 LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1069 MI.dump(););
1070 return true;
1071 }
1072 }
1073 }
1074 return false;
1075}
1076
1077/// Returns true if the instruction is dead. This was essentially
1078/// copied from DeadMachineInstructionElim::isDead, but with special cases
1079/// for inline asm, physical registers and instructions with side effects
1080/// removed.
1081bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1082 SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1083 // Examine each operand.
1084 for (const MachineOperand &MO : MI->operands()) {
1085 if (!MO.isReg() || !MO.isDef())
1086 continue;
1087
1088 Register Reg = MO.getReg();
1089 if (MRI->use_nodbg_empty(Reg))
1090 continue;
1091
1092 using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1093
1094 // This instruction has users, but if the only user is the phi node for the
1095 // parent block, and the only use of that phi node is this instruction, then
1096 // this instruction is dead: both it (and the phi node) can be removed.
1097 use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1098 use_nodbg_iterator End = MRI->use_nodbg_end();
1099 if (std::next(I) != End || !I->getParent()->isPHI())
1100 return false;
1101
1102 MachineInstr *OnePhi = I->getParent();
1103 for (const MachineOperand &OPO : OnePhi->operands()) {
1104 if (!OPO.isReg() || !OPO.isDef())
1105 continue;
1106
1107 Register OPReg = OPO.getReg();
1108 use_nodbg_iterator nextJ;
1109 for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1110 J != End; J = nextJ) {
1111 nextJ = std::next(J);
1112 MachineOperand &Use = *J;
1113 MachineInstr *UseMI = Use.getParent();
1114
1115 // If the phi node has a user that is not MI, bail.
1116 if (MI != UseMI)
1117 return false;
1118 }
1119 }
1120 DeadPhis.push_back(OnePhi);
1121 }
1122
1123 // If there are no defs with uses, the instruction is dead.
1124 return true;
1125}
1126
1127void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1128 // This procedure was essentially copied from DeadMachineInstructionElim.
1129
1131 if (isDead(MI, DeadPhis)) {
1132 LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1133
1134 // It is possible that some DBG_VALUE instructions refer to this
1135 // instruction. Examine each def operand for such references;
1136 // if found, mark the DBG_VALUE as undef (but don't delete it).
1137 for (const MachineOperand &MO : MI->operands()) {
1138 if (!MO.isReg() || !MO.isDef())
1139 continue;
1140 Register Reg = MO.getReg();
1141 // We use make_early_inc_range here because setReg below invalidates the
1142 // iterator.
1143 for (MachineOperand &MO :
1145 MachineInstr *UseMI = MO.getParent();
1146 if (UseMI == MI)
1147 continue;
1148 if (MO.isDebug())
1149 MO.setReg(0U);
1150 }
1151 }
1152
1153 MI->eraseFromParent();
1154 for (unsigned i = 0; i < DeadPhis.size(); ++i)
1155 DeadPhis[i]->eraseFromParent();
1156 }
1157}
1158
1159/// Check if the loop is a candidate for converting to a hardware
1160/// loop. If so, then perform the transformation.
1161///
1162/// This function works on innermost loops first. A loop can be converted
1163/// if it is a counting loop; either a register value or an immediate.
1164///
1165/// The code makes several assumptions about the representation of the loop
1166/// in llvm.
1167bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1168 bool &RecL0used,
1169 bool &RecL1used) {
1170 // This is just to confirm basic correctness.
1171 assert(L->getHeader() && "Loop without a header?");
1172
1173 bool Changed = false;
1174 bool L0Used = false;
1175 bool L1Used = false;
1176
1177 // Process nested loops first.
1178 for (MachineLoop *I : *L) {
1179 Changed |= convertToHardwareLoop(I, RecL0used, RecL1used);
1180 L0Used |= RecL0used;
1181 L1Used |= RecL1used;
1182 }
1183
1184 // If a nested loop has been converted, then we can't convert this loop.
1185 if (Changed && L0Used && L1Used)
1186 return Changed;
1187
1188 unsigned LOOP_i;
1189 unsigned LOOP_r;
1190 unsigned ENDLOOP;
1191
1192 // Flag used to track loopN instruction:
1193 // 1 - Hardware loop is being generated for the inner most loop.
1194 // 0 - Hardware loop is being generated for the outer loop.
1195 unsigned IsInnerHWLoop = 1;
1196
1197 if (L0Used) {
1198 LOOP_i = Hexagon::J2_loop1i;
1199 LOOP_r = Hexagon::J2_loop1r;
1200 ENDLOOP = Hexagon::ENDLOOP1;
1201 IsInnerHWLoop = 0;
1202 } else {
1203 LOOP_i = Hexagon::J2_loop0i;
1204 LOOP_r = Hexagon::J2_loop0r;
1205 ENDLOOP = Hexagon::ENDLOOP0;
1206 }
1207
1208#ifndef NDEBUG
1209 // Stop trying after reaching the limit (if any).
1210 int Limit = HWLoopLimit;
1211 if (Limit >= 0) {
1212 if (Counter >= HWLoopLimit)
1213 return false;
1214 Counter++;
1215 }
1216#endif
1217
1218 // Does the loop contain any invalid instructions?
1219 if (containsInvalidInstruction(L, IsInnerHWLoop)) {
1220 MORE->emit([&]() {
1221 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "InvalidInstruction",
1222 L->getStartLoc(), L->getHeader())
1223 << "loop contains an instruction that prevents hardware loop "
1224 "generation (e.g. a call or hardware loop register definition)";
1225 });
1226 return false;
1227 }
1228
1229 MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1230 // Don't generate hw loop if the loop has more than one exit.
1231 if (!LastMBB) {
1232 MORE->emit([&]() {
1233 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "MultipleExits",
1234 L->getStartLoc(), L->getHeader())
1235 << "loop has multiple exits and cannot be converted to a "
1236 "hardware loop";
1237 });
1238 return false;
1239 }
1240
1242 if (LastI == LastMBB->end())
1243 return false;
1244
1245 // Is the induction variable bump feeding the latch condition?
1246 if (!fixupInductionVariable(L)) {
1247 MORE->emit([&]() {
1248 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "InductionVariable",
1249 L->getStartLoc(), L->getHeader())
1250 << "could not identify or fix up the induction variable";
1251 });
1252 return false;
1253 }
1254
1255 // Ensure the loop has a preheader: the loop instruction will be
1256 // placed there.
1257 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1258 if (!Preheader) {
1259 Preheader = createPreheaderForLoop(L);
1260 if (!Preheader)
1261 return false;
1262 }
1263
1264 MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1265
1266 SmallVector<MachineInstr*, 2> OldInsts;
1267 // Are we able to determine the trip count for the loop?
1268 CountValue *TripCount = getLoopTripCount(L, OldInsts);
1269 if (!TripCount) {
1270 MORE->emit([&]() {
1271 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "TripCount",
1272 L->getStartLoc(), L->getHeader())
1273 << "trip count of the loop could not be computed";
1274 });
1275 return false;
1276 }
1277
1278 // Is the trip count available in the preheader?
1279 if (TripCount->isReg()) {
1280 // There will be a use of the register inserted into the preheader,
1281 // so make sure that the register is actually defined at that point.
1282 MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1283 MachineBasicBlock *BBDef = TCDef->getParent();
1284 if (!MDT->dominates(BBDef, Preheader)) {
1285 MORE->emit([&]() {
1286 return MachineOptimizationRemarkMissed(DEBUG_TYPE,
1287 "TripCountNotDominating",
1288 L->getStartLoc(), L->getHeader())
1289 << "trip count register is not available in the loop preheader";
1290 });
1291 return false;
1292 }
1293 }
1294
1295 // Determine the loop start.
1296 MachineBasicBlock *TopBlock = L->getTopBlock();
1297 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1298 MachineBasicBlock *LoopStart = nullptr;
1299 if (ExitingBlock != L->getLoopLatch()) {
1300 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1302
1303 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1304 return false;
1305
1306 if (L->contains(TB))
1307 LoopStart = TB;
1308 else if (L->contains(FB))
1309 LoopStart = FB;
1310 else
1311 return false;
1312 }
1313 else
1314 LoopStart = TopBlock;
1315
1316 // Convert the loop to a hardware loop.
1317 LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1318 DebugLoc DL;
1319 if (InsertPos != Preheader->end())
1320 DL = InsertPos->getDebugLoc();
1321
1322 if (TripCount->isReg()) {
1323 // Create a copy of the loop count register.
1324 Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1325 BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1326 .addReg(TripCount->getReg(), {}, TripCount->getSubReg());
1327 // Add the Loop instruction to the beginning of the loop.
1328 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1329 .addReg(CountReg);
1330 } else {
1331 assert(TripCount->isImm() && "Expecting immediate value for trip count");
1332 // Add the Loop immediate instruction to the beginning of the loop,
1333 // if the immediate fits in the instructions. Otherwise, we need to
1334 // create a new virtual register.
1335 int64_t CountImm = TripCount->getImm();
1336 if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1337 Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1338 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1339 .addImm(CountImm);
1340 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1341 .addMBB(LoopStart).addReg(CountReg);
1342 } else
1343 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1344 .addMBB(LoopStart).addImm(CountImm);
1345 }
1346
1347 // Make sure the loop start always has a reference in the CFG.
1348 LoopStart->setMachineBlockAddressTaken();
1349
1350 // Replace the loop branch with an endloop instruction.
1351 DebugLoc LastIDL = LastI->getDebugLoc();
1352 BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1353
1354 // The loop ends with either:
1355 // - a conditional branch followed by an unconditional branch, or
1356 // - a conditional branch to the loop start.
1357 if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1358 LastI->getOpcode() == Hexagon::J2_jumpf) {
1359 // Delete one and change/add an uncond. branch to out of the loop.
1360 MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1361 LastI = LastMBB->erase(LastI);
1362 if (!L->contains(BranchTarget)) {
1363 if (LastI != LastMBB->end())
1364 LastI = LastMBB->erase(LastI);
1366 TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1367 }
1368 } else {
1369 // Conditional branch to loop start; just delete it.
1370 LastMBB->erase(LastI);
1371 }
1372 delete TripCount;
1373
1374 // The induction operation and the comparison may now be
1375 // unneeded. If these are unneeded, then remove them.
1376 for (unsigned i = 0; i < OldInsts.size(); ++i)
1377 removeIfDead(OldInsts[i]);
1378
1379 ++NumHWLoops;
1380
1381 MORE->emit([&]() {
1382 return MachineOptimizationRemark(DEBUG_TYPE, "HardwareLoop",
1383 L->getStartLoc(), L->getHeader())
1384 << "converted loop to hardware loop";
1385 });
1386
1387 // Set RecL1used and RecL0used only after hardware loop has been
1388 // successfully generated. Doing it earlier can cause wrong loop instruction
1389 // to be used.
1390 if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1391 RecL1used = true;
1392 else
1393 RecL0used = true;
1394
1395 return true;
1396}
1397
1398bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1399 MachineInstr *CmpI) {
1400 assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1401
1402 MachineBasicBlock *BB = BumpI->getParent();
1403 if (CmpI->getParent() != BB)
1404 return false;
1405
1406 using instr_iterator = MachineBasicBlock::instr_iterator;
1407
1408 // Check if things are in order to begin with.
1409 for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1410 if (&*I == CmpI)
1411 return true;
1412
1413 // Out of order.
1414 Register PredR = CmpI->getOperand(0).getReg();
1415 bool FoundBump = false;
1416 instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1417 for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1418 MachineInstr *In = &*I;
1419 for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1420 MachineOperand &MO = In->getOperand(i);
1421 if (MO.isReg() && MO.isUse()) {
1422 if (MO.getReg() == PredR) // Found an intervening use of PredR.
1423 return false;
1424 }
1425 }
1426
1427 if (In == BumpI) {
1428 BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1429 FoundBump = true;
1430 break;
1431 }
1432 }
1433 assert (FoundBump && "Cannot determine instruction order");
1434 return FoundBump;
1435}
1436
1437/// This function is required to break recursion. Visiting phis in a loop may
1438/// result in recursion during compilation. We break the recursion by making
1439/// sure that we visit a MachineOperand and its definition in a
1440/// MachineInstruction only once. If we attempt to visit more than once, then
1441/// there is recursion, and will return false.
1442bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1443 MachineInstr *MI,
1444 const MachineOperand *MO,
1445 LoopFeederMap &LoopFeederPhi) const {
1446 if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1447 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1448 << printMBBReference(**L->block_begin()));
1449 // Ignore all BBs that form Loop.
1450 if (llvm::is_contained(L->getBlocks(), A))
1451 return false;
1452 MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1453 LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1454 return true;
1455 } else
1456 // Already visited node.
1457 return false;
1458}
1459
1460/// Return true if a Phi may generate a value that can underflow.
1461/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1462bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1463 MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1464 MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1465 assert(Phi->isPHI() && "Expecting a Phi.");
1466 // Walk through each Phi, and its used operands. Make sure that
1467 // if there is recursion in Phi, we won't generate hardware loops.
1468 for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1469 if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1470 if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1471 Phi->getParent(), L, LoopFeederPhi))
1472 return true;
1473 return false;
1474}
1475
1476/// Return true if the induction variable can underflow in the first iteration.
1477/// An example, is an initial unsigned value that is 0 and is decrement in the
1478/// first itertion of a do-while loop. In this case, we cannot generate a
1479/// hardware loop because the endloop instruction does not decrement the loop
1480/// counter if it is <= 1. We only need to perform this analysis if the
1481/// initial value is a register.
1482///
1483/// This function assumes the initial value may underflow unless proven
1484/// otherwise. If the type is signed, then we don't care because signed
1485/// underflow is undefined. We attempt to prove the initial value is not
1486/// zero by performing a crude analysis of the loop counter. This function
1487/// checks if the initial value is used in any comparison prior to the loop
1488/// and, if so, assumes the comparison is a range check. This is inexact,
1489/// but will catch the simple cases.
1490bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1491 const MachineOperand *InitVal, const MachineOperand *EndVal,
1492 MachineBasicBlock *MBB, MachineLoop *L,
1493 LoopFeederMap &LoopFeederPhi) const {
1494 // Only check register values since they are unknown.
1495 if (!InitVal->isReg())
1496 return false;
1497
1498 if (!EndVal->isImm())
1499 return false;
1500
1501 // A register value that is assigned an immediate is a known value, and it
1502 // won't underflow in the first iteration.
1503 int64_t Imm;
1504 if (checkForImmediate(*InitVal, Imm))
1505 return (EndVal->getImm() == Imm);
1506
1507 Register Reg = InitVal->getReg();
1508
1509 // We don't know the value of a physical register.
1510 if (!Reg.isVirtual())
1511 return true;
1512
1513 MachineInstr *Def = MRI->getVRegDef(Reg);
1514 if (!Def)
1515 return true;
1516
1517 // If the initial value is a Phi or copy and the operands may not underflow,
1518 // then the definition cannot be underflow either.
1519 if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1520 L, LoopFeederPhi))
1521 return false;
1522 if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1523 EndVal, Def->getParent(),
1524 L, LoopFeederPhi))
1525 return false;
1526
1527 // Iterate over the uses of the initial value. If the initial value is used
1528 // in a compare, then we assume this is a range check that ensures the loop
1529 // doesn't underflow. This is not an exact test and should be improved.
1531 E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1532 MachineInstr *MI = &*I;
1533 Register CmpReg1, CmpReg2;
1534 int64_t CmpMask = 0, CmpValue = 0;
1535
1536 if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1537 continue;
1538
1539 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1541 if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1542 continue;
1543
1544 Comparison::Kind Cmp =
1545 getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1546 if (Cmp == 0)
1547 continue;
1548 if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1549 Cmp = Comparison::getNegatedComparison(Cmp);
1550 if (CmpReg2 != 0 && CmpReg2 == Reg)
1551 Cmp = Comparison::getSwappedComparison(Cmp);
1552
1553 // Signed underflow is undefined.
1554 if (Comparison::isSigned(Cmp))
1555 return false;
1556
1557 // Check if there is a comparison of the initial value. If the initial value
1558 // is greater than or not equal to another value, then assume this is a
1559 // range check.
1560 if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1561 return false;
1562 }
1563
1564 // OK - this is a hack that needs to be improved. We really need to analyze
1565 // the instructions performed on the initial value. This works on the simplest
1566 // cases only.
1567 if (!Def->isCopy() && !Def->isPHI())
1568 return false;
1569
1570 return true;
1571}
1572
1573bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1574 int64_t &Val) const {
1575 if (MO.isImm()) {
1576 Val = MO.getImm();
1577 return true;
1578 }
1579 if (!MO.isReg())
1580 return false;
1581
1582 // MO is a register. Check whether it is defined as an immediate value,
1583 // and if so, get the value of it in TV. That value will then need to be
1584 // processed to handle potential subregisters in MO.
1585 int64_t TV;
1586
1587 Register R = MO.getReg();
1588 if (!R.isVirtual())
1589 return false;
1590 MachineInstr *DI = MRI->getVRegDef(R);
1591 unsigned DOpc = DI->getOpcode();
1592 switch (DOpc) {
1593 case TargetOpcode::COPY:
1594 case Hexagon::A2_tfrsi:
1595 case Hexagon::A2_tfrpi:
1596 case Hexagon::CONST32:
1597 case Hexagon::CONST64:
1598 // Call recursively to avoid an extra check whether operand(1) is
1599 // indeed an immediate (it could be a global address, for example),
1600 // plus we can handle COPY at the same time.
1601 if (!checkForImmediate(DI->getOperand(1), TV))
1602 return false;
1603 break;
1604 case Hexagon::A2_combineii:
1605 case Hexagon::A4_combineir:
1606 case Hexagon::A4_combineii:
1607 case Hexagon::A4_combineri:
1608 case Hexagon::A2_combinew: {
1609 const MachineOperand &S1 = DI->getOperand(1);
1610 const MachineOperand &S2 = DI->getOperand(2);
1611 int64_t V1, V2;
1612 if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1613 return false;
1614 TV = V2 | (static_cast<uint64_t>(V1) << 32);
1615 break;
1616 }
1617 case TargetOpcode::REG_SEQUENCE: {
1618 const MachineOperand &S1 = DI->getOperand(1);
1619 const MachineOperand &S3 = DI->getOperand(3);
1620 int64_t V1, V3;
1621 if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1622 return false;
1623 unsigned Sub2 = DI->getOperand(2).getImm();
1624 unsigned Sub4 = DI->getOperand(4).getImm();
1625 if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1626 TV = V1 | (V3 << 32);
1627 else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1628 TV = V3 | (V1 << 32);
1629 else
1630 llvm_unreachable("Unexpected form of REG_SEQUENCE");
1631 break;
1632 }
1633
1634 default:
1635 return false;
1636 }
1637
1638 // By now, we should have successfully obtained the immediate value defining
1639 // the register referenced in MO. Handle a potential use of a subregister.
1640 switch (MO.getSubReg()) {
1641 case Hexagon::isub_lo:
1642 Val = TV & 0xFFFFFFFFULL;
1643 break;
1644 case Hexagon::isub_hi:
1645 Val = (TV >> 32) & 0xFFFFFFFFULL;
1646 break;
1647 default:
1648 Val = TV;
1649 break;
1650 }
1651 return true;
1652}
1653
1654void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1655 if (MO.isImm()) {
1656 MO.setImm(Val);
1657 return;
1658 }
1659
1660 assert(MO.isReg());
1661 Register R = MO.getReg();
1662 MachineInstr *DI = MRI->getVRegDef(R);
1663
1664 const TargetRegisterClass *RC = MRI->getRegClass(R);
1665 Register NewR = MRI->createVirtualRegister(RC);
1666 MachineBasicBlock &B = *DI->getParent();
1667 DebugLoc DL = DI->getDebugLoc();
1668 BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1669 MO.setReg(NewR);
1670}
1671
1672bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1673 MachineBasicBlock *Header = L->getHeader();
1674 MachineBasicBlock *Latch = L->getLoopLatch();
1675 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1676
1677 if (!(Header && Latch && ExitingBlock))
1678 return false;
1679
1680 // These data structures follow the same concept as the corresponding
1681 // ones in findInductionRegister (where some comments are).
1682 using RegisterBump = std::pair<Register, int64_t>;
1683 using RegisterInduction = std::pair<Register, RegisterBump>;
1684 using RegisterInductionSet = std::set<RegisterInduction>;
1685
1686 // Register candidates for induction variables, with their associated bumps.
1687 RegisterInductionSet IndRegs;
1688
1689 // Look for induction patterns:
1690 // %1 = PHI ..., [ latch, %2 ]
1691 // %2 = ADD %1, imm
1692 using instr_iterator = MachineBasicBlock::instr_iterator;
1693
1694 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1695 I != E && I->isPHI(); ++I) {
1696 MachineInstr *Phi = &*I;
1697
1698 // Have a PHI instruction.
1699 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1700 if (Phi->getOperand(i+1).getMBB() != Latch)
1701 continue;
1702
1703 Register PhiReg = Phi->getOperand(i).getReg();
1704 MachineInstr *DI = MRI->getVRegDef(PhiReg);
1705
1706 if (DI->getDesc().isAdd()) {
1707 // If the register operand to the add/sub is the PHI we are looking
1708 // at, this meets the induction pattern.
1709 Register IndReg = DI->getOperand(1).getReg();
1710 MachineOperand &Opnd2 = DI->getOperand(2);
1711 int64_t V;
1712 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1713 Register UpdReg = DI->getOperand(0).getReg();
1714 IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1715 }
1716 }
1717 } // for (i)
1718 } // for (instr)
1719
1720 if (IndRegs.empty())
1721 return false;
1722
1723 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1725 // analyzeBranch returns true if it fails to analyze branch.
1726 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1727 if (NotAnalyzed || Cond.empty())
1728 return false;
1729
1730 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1731 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1733 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1734 if (NotAnalyzed)
1735 return false;
1736
1737 // Since latch is not the exiting block, the latch branch should be an
1738 // unconditional branch to the loop header.
1739 if (TB == Latch)
1740 TB = (LTB == Header) ? LTB : LFB;
1741 else
1742 FB = (LTB == Header) ? LTB : LFB;
1743 }
1744 if (TB != Header) {
1745 if (FB != Header) {
1746 // The latch/exit block does not go back to the header.
1747 return false;
1748 }
1749 // FB is the header (i.e., uncond. jump to branch header)
1750 // In this case, the LoopBody -> TB should not be a back edge otherwise
1751 // it could result in an infinite loop after conversion to hw_loop.
1752 // This case can happen when the Latch has two jumps like this:
1753 // Jmp_c OuterLoopHeader <-- TB
1754 // Jmp InnerLoopHeader <-- FB
1755 if (MDT->dominates(TB, FB))
1756 return false;
1757 }
1758
1759 // Expecting a predicate register as a condition. It won't be a hardware
1760 // predicate register at this point yet, just a vreg.
1761 // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)
1762 // into Cond, followed by the predicate register. For non-negated branches
1763 // it's just the register.
1764 unsigned CSz = Cond.size();
1765 if (CSz != 1 && CSz != 2)
1766 return false;
1767
1768 if (!Cond[CSz-1].isReg())
1769 return false;
1770
1771 Register P = Cond[CSz - 1].getReg();
1772 MachineInstr *PredDef = MRI->getVRegDef(P);
1773
1774 if (!PredDef->isCompare())
1775 return false;
1776
1777 SmallSet<Register,2> CmpRegs;
1778 MachineOperand *CmpImmOp = nullptr;
1779
1780 // Go over all operands to the compare and look for immediate and register
1781 // operands. Assume that if the compare has a single register use and a
1782 // single immediate operand, then the register is being compared with the
1783 // immediate value.
1784 for (MachineOperand &MO : PredDef->operands()) {
1785 if (MO.isReg()) {
1786 // Skip all implicit references. In one case there was:
1787 // %140 = FCMPUGT32_rr %138, %139, implicit %usr
1788 if (MO.isImplicit())
1789 continue;
1790 if (MO.isUse()) {
1791 if (!isImmediate(MO)) {
1792 CmpRegs.insert(MO.getReg());
1793 continue;
1794 }
1795 // Consider the register to be the "immediate" operand.
1796 if (CmpImmOp)
1797 return false;
1798 CmpImmOp = &MO;
1799 }
1800 } else if (MO.isImm()) {
1801 if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1802 return false;
1803 CmpImmOp = &MO;
1804 }
1805 }
1806
1807 if (CmpRegs.empty())
1808 return false;
1809
1810 // Check if the compared register follows the order we want. Fix if needed.
1811 for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1812 I != E; ++I) {
1813 // This is a success. If the register used in the comparison is one that
1814 // we have identified as a bumped (updated) induction register, there is
1815 // nothing to do.
1816 if (CmpRegs.count(I->first))
1817 return true;
1818
1819 // Otherwise, if the register being compared comes out of a PHI node,
1820 // and has been recognized as following the induction pattern, and is
1821 // compared against an immediate, we can fix it.
1822 const RegisterBump &RB = I->second;
1823 if (CmpRegs.count(RB.first)) {
1824 if (!CmpImmOp) {
1825 // If both operands to the compare instruction are registers, see if
1826 // it can be changed to use induction register as one of the operands.
1827 MachineInstr *IndI = nullptr;
1828 MachineInstr *nonIndI = nullptr;
1829 MachineOperand *IndMO = nullptr;
1830 MachineOperand *nonIndMO = nullptr;
1831
1832 for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1833 MachineOperand &MO = PredDef->getOperand(i);
1834 if (MO.isReg() && MO.getReg() == RB.first) {
1835 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1836 << ") = " << *(MRI->getVRegDef(I->first)));
1837 if (IndI)
1838 return false;
1839
1840 IndI = MRI->getVRegDef(I->first);
1841 IndMO = &MO;
1842 } else if (MO.isReg()) {
1843 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1844 << ") = " << *(MRI->getVRegDef(MO.getReg())));
1845 if (nonIndI)
1846 return false;
1847
1848 nonIndI = MRI->getVRegDef(MO.getReg());
1849 nonIndMO = &MO;
1850 }
1851 }
1852 if (IndI && nonIndI &&
1853 nonIndI->getOpcode() == Hexagon::A2_addi &&
1854 nonIndI->getOperand(2).isImm() &&
1855 nonIndI->getOperand(2).getImm() == - RB.second) {
1856 bool Order = orderBumpCompare(IndI, PredDef);
1857 if (Order) {
1858 IndMO->setReg(I->first);
1859 nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1860 return true;
1861 }
1862 }
1863 return false;
1864 }
1865
1866 // It is not valid to do this transformation on an unsigned comparison
1867 // because it may underflow.
1868 Comparison::Kind Cmp =
1869 getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1870 if (!Cmp || Comparison::isUnsigned(Cmp))
1871 return false;
1872
1873 // If the register is being compared against an immediate, try changing
1874 // the compare instruction to use induction register and adjust the
1875 // immediate operand.
1876 int64_t CmpImm = getImmediate(*CmpImmOp);
1877 int64_t V = RB.second;
1878 // Handle Overflow (64-bit).
1879 if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1880 ((V < 0) && (CmpImm < INT64_MIN - V)))
1881 return false;
1882 CmpImm += V;
1883 // Most comparisons of register against an immediate value allow
1884 // the immediate to be constant-extended. There are some exceptions
1885 // though. Make sure the new combination will work.
1886 if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) &&
1887 !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false))
1888 return false;
1889
1890 // Make sure that the compare happens after the bump. Otherwise,
1891 // after the fixup, the compare would use a yet-undefined register.
1892 MachineInstr *BumpI = MRI->getVRegDef(I->first);
1893 bool Order = orderBumpCompare(BumpI, PredDef);
1894 if (!Order)
1895 return false;
1896
1897 // Finally, fix the compare instruction.
1898 setImmediate(*CmpImmOp, CmpImm);
1899 for (MachineOperand &MO : PredDef->operands()) {
1900 if (MO.isReg() && MO.getReg() == RB.first) {
1901 MO.setReg(I->first);
1902 return true;
1903 }
1904 }
1905 }
1906 }
1907
1908 return false;
1909}
1910
1911/// createPreheaderForLoop - Create a preheader for a given loop.
1912MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1913 MachineLoop *L) {
1914 if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1915 return TmpPH;
1916 if (!HWCreatePreheader)
1917 return nullptr;
1918
1919 MachineBasicBlock *Header = L->getHeader();
1920 MachineBasicBlock *Latch = L->getLoopLatch();
1921 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1922 MachineFunction *MF = Header->getParent();
1923 DebugLoc DL;
1924
1925#ifndef NDEBUG
1926 if ((!PHFn.empty()) && (PHFn != MF->getName()))
1927 return nullptr;
1928#endif
1929
1930 if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1931 return nullptr;
1932
1933 using instr_iterator = MachineBasicBlock::instr_iterator;
1934
1935 // Verify that all existing predecessors have analyzable branches
1936 // (or no branches at all).
1937 using MBBVector = std::vector<MachineBasicBlock *>;
1938
1939 MBBVector Preds(Header->pred_begin(), Header->pred_end());
1941 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1942
1943 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1944 return nullptr;
1945
1946 for (MachineBasicBlock *PB : Preds) {
1947 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1948 if (NotAnalyzed)
1949 return nullptr;
1950 }
1951
1952 MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1953 MF->insert(Header->getIterator(), NewPH);
1954
1955 if (Header->pred_size() > 2) {
1956 // Ensure that the header has only two predecessors: the preheader and
1957 // the loop latch. Any additional predecessors of the header should
1958 // join at the newly created preheader. Inspect all PHI nodes from the
1959 // header and create appropriate corresponding PHI nodes in the preheader.
1960
1961 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1962 I != E && I->isPHI(); ++I) {
1963 MachineInstr *PN = &*I;
1964
1965 const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1966 MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1967 NewPH->insert(NewPH->end(), NewPN);
1968
1969 Register PR = PN->getOperand(0).getReg();
1970 const TargetRegisterClass *RC = MRI->getRegClass(PR);
1971 Register NewPR = MRI->createVirtualRegister(RC);
1972 NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1973
1974 // Copy all non-latch operands of a header's PHI node to the newly
1975 // created PHI node in the preheader.
1976 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1977 Register PredR = PN->getOperand(i).getReg();
1978 unsigned PredRSub = PN->getOperand(i).getSubReg();
1979 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1980 if (PredB == Latch)
1981 continue;
1982
1983 MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1984 MO.setSubReg(PredRSub);
1985 NewPN->addOperand(MO);
1987 }
1988
1989 // Remove copied operands from the old PHI node and add the value
1990 // coming from the preheader's PHI.
1991 for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1992 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1993 if (PredB != Latch) {
1994 PN->removeOperand(i+1);
1995 PN->removeOperand(i);
1996 }
1997 }
1998 PN->addOperand(MachineOperand::CreateReg(NewPR, false));
2000 }
2001 } else {
2002 assert(Header->pred_size() == 2);
2003
2004 // The header has only two predecessors, but the non-latch predecessor
2005 // is not a preheader (e.g. it has other successors, etc.)
2006 // In such a case we don't need any extra PHI nodes in the new preheader,
2007 // all we need is to adjust existing PHIs in the header to now refer to
2008 // the new preheader.
2009 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
2010 I != E && I->isPHI(); ++I) {
2011 MachineInstr *PN = &*I;
2012 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
2013 MachineOperand &MO = PN->getOperand(i+1);
2014 if (MO.getMBB() != Latch)
2015 MO.setMBB(NewPH);
2016 }
2017 }
2018 }
2019
2020 // "Reroute" the CFG edges to link in the new preheader.
2021 // If any of the predecessors falls through to the header, insert a branch
2022 // to the new preheader in that place.
2025
2026 TB = FB = nullptr;
2027
2028 for (MachineBasicBlock *PB : Preds) {
2029 if (PB != Latch) {
2030 Tmp2.clear();
2031 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
2032 (void)NotAnalyzed; // suppress compiler warning
2033 assert (!NotAnalyzed && "Should be analyzable!");
2034 if (TB != Header && (Tmp2.empty() || FB != Header))
2035 TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
2036 PB->ReplaceUsesOfBlockWith(Header, NewPH);
2037 }
2038 }
2039
2040 // It can happen that the latch block will fall through into the header.
2041 // Insert an unconditional branch to the header.
2042 TB = FB = nullptr;
2043 bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
2044 (void)LatchNotAnalyzed; // suppress compiler warning
2045 assert (!LatchNotAnalyzed && "Should be analyzable!");
2046 if (!TB && !FB)
2047 TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
2048
2049 // Finally, the branch from the preheader to the header.
2050 TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
2051 NewPH->addSuccessor(Header);
2052
2053 MachineLoop *ParentLoop = L->getParentLoop();
2054 if (ParentLoop)
2055 ParentLoop->addBasicBlockToLoop(NewPH, *MLI);
2056
2057 // Update the dominator information with the new preheader.
2058 if (MDT) {
2059 if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
2060 if (MachineDomTreeNode *DHN = HN->getIDom()) {
2061 MDT->addNewBlock(NewPH, DHN->getBlock());
2062 MDT->changeImmediateDominator(Header, NewPH);
2063 }
2064 }
2065 }
2066
2067 return NewPH;
2068}
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isSigned(unsigned Opcode)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
static cl::opt< bool > SpecPreheader("hwloop-spec-preheader", cl::Hidden, cl::desc("Allow speculation of preheader " "instructions"))
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
#define P(N)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
SmallVector< MachineBasicBlock *, 4 > MBBVector
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getLoopTripCount(const Loop *L, ScalarEvolution &SE)
Get the assumed loop trip count for the loop L.
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
AnalysisUsage & addRequired()
DomTreeNodeBase * getIDom() const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool doesNotReturn(const MachineInstr &CallMI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool isValidOffset(unsigned Opcode, int Offset, const TargetRegisterInfo *TRI, bool Extend=true) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool predOpcodeHasNot(ArrayRef< MachineOperand > Cond) const
bool getPredReg(ArrayRef< MachineOperand > Cond, Register &PredReg, unsigned &PredRegPos, RegState &PredRegFlags) const
bool isExtendable(const MachineInstr &MI) const
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isAdd() const
Return true if the instruction is an add instruction.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
Instructions::iterator instr_iterator
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
mop_range operands()
LLVM_ABI void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
defusechain_instr_iterator< true, false, true, true > use_instr_nodbg_iterator
use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk all uses of the specified r...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static use_nodbg_iterator use_nodbg_end()
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
defusechain_iterator< true, false, true, true, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
iterator_range< use_iterator > use_operands(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
void dump() const
Definition Pass.cpp:146
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
bool empty() const
Definition SmallSet.h:169
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
void push_back(const T &Elt)
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ TB
TB - TwoByte - Set if this instruction has a two byte opcode, which starts with a 0x0F byte before th...
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
RegState
Flags to represent properties of register accesses.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionPass * createHexagonHardwareLoops()
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
@ Sub
Subtraction of integers.
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define MORE()
Definition regcomp.c:246