42#include "llvm/IR/IntrinsicsHexagon.h"
77#define DEBUG_TYPE "hexagon-lir"
83 cl::desc(
"Disable generation of memcpy in loop idiom recognition"));
87 cl::desc(
"Disable generation of memmove in loop idiom recognition"));
91 "check guarding the memmove."));
95 cl::desc(
"Threshold (in bytes) to perform the transformation, if the "
96 "runtime loop count (mem transfer size) is known at compile-time."));
100 cl::desc(
"Only enable generating memmove in non-nested loops"));
104 cl::desc(
"Enable Hexagon-specific memcpy for volatile destination."));
111class HexagonLoopIdiomRecognize {
117 :
AA(
AA), DT(DT), LF(LF), TLI(TLI), SE(SE), ORE(ORE) {}
122 int getSCEVStride(
const SCEVAddRecExpr *StoreEv);
123 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
124 void collectStores(Loop *CurLoop, BasicBlock *BB,
125 SmallVectorImpl<StoreInst *> &Stores);
126 bool processCopyingStore(Loop *CurLoop, StoreInst *SI,
const SCEV *BECount);
127 bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts)
const;
128 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
const SCEV *BECount,
129 SmallVectorImpl<BasicBlock *> &ExitBlocks);
130 bool runOnCountableLoop(Loop *L);
133 const DataLayout *DL;
136 const TargetLibraryInfo *TLI;
138 OptimizationRemarkEmitter &ORE;
139 bool HasMemcpy, HasMemmove;
142class HexagonLoopIdiomRecognizeLegacyPass :
public LoopPass {
146 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {}
148 StringRef getPassName()
const override {
149 return "Recognize Hexagon-specific loop idioms";
152 void getAnalysisUsage(AnalysisUsage &AU)
const override {
160 AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
164 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
170 Rule(StringRef
N, FuncType
F) : Name(
N), Fn(
F) {}
175 void addRule(StringRef
N,
const Rule::FuncType &
F) {
176 Rules.push_back(Rule(
N,
F));
180 struct WorkListType {
181 WorkListType() =
default;
183 void push_back(
Value *V) {
185 if (S.insert(V).second)
189 Value *pop_front_val() {
196 bool empty()
const {
return Q.empty(); }
199 std::deque<Value *> Q;
203 using ValueSetType = std::set<Value *>;
205 std::vector<Rule> Rules;
209 using ValueMapType = DenseMap<Value *, Value *>;
223 void print(raw_ostream &OS,
const Value *V)
const;
227 friend struct Simplifier;
232 template <
typename FuncT>
void traverse(
Value *V, FuncT
F);
233 void record(
Value *V);
235 void unuse(
Value *V);
237 bool equal(
const Instruction *
I,
const Instruction *J)
const;
248 PE(
const Simplifier::Context &c,
Value *v =
nullptr) : C(c), V(
v) {}
250 const Simplifier::Context &C;
256 P.C.print(OS,
P.V ?
P.V :
P.C.Root);
262char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
265 "Recognize Hexagon-specific loop idioms",
false,
false)
277template <typename FuncT>
278void Simplifier::Context::traverse(
Value *V, FuncT
F) {
283 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
284 if (!U || U->getParent())
288 for (Value *Op : U->operands())
296 OS <<
V <<
'(' << *
V <<
')';
300 if (
U->getParent()) {
302 U->printAsOperand(OS,
true);
307 unsigned N =
U->getNumOperands();
310 OS <<
U->getOpcodeName();
311 for (
const Value *
Op :
U->operands()) {
319void Simplifier::Context::initialize(Instruction *Exp) {
329 Value *
V = Q.pop_front_val();
337 M.insert({
U,
U->clone()});
341 for (std::pair<Value*,Value*>
P : M) {
343 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
344 auto F =
M.find(
U->getOperand(i));
346 U->setOperand(i,
F->second);
350 auto R =
M.find(Exp);
358void Simplifier::Context::record(
Value *V) {
366void Simplifier::Context::use(
Value *V) {
374void Simplifier::Context::unuse(
Value *V) {
398 if (!U ||
U->getParent())
400 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
403 U->setOperand(i, NewV);
413void Simplifier::Context::replace(
Value *OldV,
Value *NewV) {
429 Value *
V = Q.pop_front_val();
431 if (!U ||
U->getParent())
435 NewV = subst(NewV, V, DupV);
443 Root = subst(Root, OldV, NewV);
447void Simplifier::Context::cleanup() {
448 for (
Value *V : Clones) {
451 U->dropAllReferences();
454 for (
Value *V : Clones) {
461bool Simplifier::Context::equal(
const Instruction *
I,
462 const Instruction *J)
const {
465 if (!
I->isSameOperationAs(J))
468 return I->isIdenticalTo(J);
470 for (
unsigned i = 0, n =
I->getNumOperands(); i != n; ++i) {
477 if (!
equal(InI, InJ))
479 }
else if (InI != InJ || !InI)
491 Value *
V = Q.pop_front_val();
495 if (!U ||
U->getParent())
497 if (SubI &&
equal(SubI, U))
506void Simplifier::Context::link(Instruction *
I, BasicBlock *
B,
516 I->insertInto(
B, At);
519Value *Simplifier::Context::materialize(BasicBlock *
B,
533 if (
Count++ >= Limit)
536 if (!U ||
U->getParent() || !
C.Used.count(U))
539 for (Rule &R : Rules) {
554 return Count < Limit ?
C.Root :
nullptr;
565 class PolynomialMultiplyRecognize {
567 explicit PolynomialMultiplyRecognize(Loop *loop,
const DataLayout &dl,
568 const DominatorTree &dt,
const TargetLibraryInfo &tli,
570 : CurLoop(loop),
DL(dl), DT(dt), TLI(tli), SE(se) {}
575 using ValueSeq = SetVector<Value *>;
577 IntegerType *getPmpyType()
const {
578 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
582 bool isPromotableTo(
Value *V, IntegerType *Ty);
583 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
584 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
586 Value *getCountIV(BasicBlock *BB);
588 void classifyCycle(Instruction *DivI, ValueSeq &
Cycle, ValueSeq &Early,
590 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
591 bool commutesWithShift(Instruction *
I);
592 bool highBitsAreZero(
Value *V,
unsigned IterCount);
593 bool keepsHighBitsZero(
Value *V,
unsigned IterCount);
594 bool isOperandShifted(Instruction *
I,
Value *
Op);
595 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
597 void cleanupLoopBody(BasicBlock *LoopB);
599 struct ParsedValues {
600 ParsedValues() =
default;
608 unsigned IterCount = 0;
613 bool matchLeftShift(SelectInst *SelI,
Value *CIV, ParsedValues &PV);
614 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
615 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
616 Value *CIV, ParsedValues &PV,
bool PreScan);
617 unsigned getInverseMxN(
unsigned QP);
620 void setupPreSimplifier(Simplifier &S);
621 void setupPostSimplifier(Simplifier &S);
624 const DataLayout &
DL;
625 const DominatorTree &DT;
626 const TargetLibraryInfo &TLI;
632Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
634 if (std::distance(PI, PE) != 2)
640 Value *InitV = PN->getIncomingValueForBlock(
PB);
643 Value *IterV = PN->getIncomingValueForBlock(BB);
647 if (BO->getOpcode() != Instruction::Add)
649 Value *IncV =
nullptr;
650 if (BO->getOperand(0) == PN)
651 IncV = BO->getOperand(1);
652 else if (BO->getOperand(1) == PN)
653 IncV = BO->getOperand(0);
665 for (
auto UI =
I->user_begin(), UE =
I->user_end(); UI != UE;) {
666 Use &TheUse = UI.getUse();
669 if (BB ==
II->getParent())
670 II->replaceUsesOfWith(
I, J);
674bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
675 Value *CIV, ParsedValues &PV) {
687 using namespace PatternMatch;
690 Value *
A =
nullptr, *
B =
nullptr, *
C =
nullptr;
700 Value *
X =
nullptr, *Sh1 =
nullptr;
728 Value *ShouldSameV =
nullptr, *ShouldXoredV =
nullptr;
731 ShouldXoredV = FalseV;
733 ShouldSameV = FalseV;
734 ShouldXoredV = TrueV;
737 Value *Q =
nullptr, *
R =
nullptr, *
Y =
nullptr, *
Z =
nullptr;
743 if (ShouldSameV ==
Y)
745 else if (ShouldSameV == Z)
788bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
801 using namespace PatternMatch;
828 Value *
R =
nullptr, *Q =
nullptr;
858bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
859 BasicBlock *LoopB, BasicBlock *PrehB,
Value *CIV, ParsedValues &PV,
861 using namespace PatternMatch;
900 if (matchLeftShift(SelI, CIV, PV)) {
909 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
915 if (CurLoop->isLoopInvariant(PV.X)) {
925 Value *Var =
nullptr, *Inv =
nullptr, *X1 =
nullptr, *X2 =
nullptr;
930 if (!I1 ||
I1->getParent() != LoopB) {
933 }
else if (!I2 || I2->getParent() != LoopB) {
944 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
951 if (matchRightShift(SelI, PV)) {
965bool PolynomialMultiplyRecognize::isPromotableTo(
Value *Val,
966 IntegerType *DestTy) {
983 switch (
In->getOpcode()) {
984 case Instruction::PHI:
985 case Instruction::ZExt:
986 case Instruction::And:
987 case Instruction::Or:
988 case Instruction::Xor:
989 case Instruction::LShr:
990 case Instruction::Select:
991 case Instruction::Trunc:
993 case Instruction::ICmp:
995 return CI->isEquality() || CI->isUnsigned();
997 case Instruction::Add:
998 return In->hasNoSignedWrap() &&
In->hasNoUnsignedWrap();
1003void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1004 IntegerType *DestTy, BasicBlock *LoopB) {
1005 Type *OrigTy =
In->getType();
1009 if (!
In->getType()->isIntegerTy(1))
1010 In->mutateType(DestTy);
1015 unsigned N =
P->getNumIncomingValues();
1016 for (
unsigned i = 0; i !=
N; ++i) {
1020 Value *InV =
P->getIncomingValue(i);
1023 if (Ty !=
P->getType()) {
1028 P->setIncomingValue(i, InV);
1033 if (
Op->getType() ==
Z->getType())
1034 Z->replaceAllUsesWith(
Op);
1035 Z->eraseFromParent();
1042 T->replaceAllUsesWith(
And);
1043 T->eraseFromParent();
1048 for (
unsigned i = 0, n =
In->getNumOperands(); i != n; ++i) {
1050 if (CI->getBitWidth() < DestBW)
1051 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1055bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1056 BasicBlock *ExitB) {
1064 IntegerType *DestTy = getPmpyType();
1068 for (PHINode &
P : ExitB->
phis()) {
1069 if (
P.getNumIncomingValues() != 1)
1071 assert(
P.getIncomingBlock(0) == LoopB);
1073 if (!
T ||
T->getBitWidth() > DestBW)
1078 for (Instruction &In : *LoopB)
1079 if (!
In.isTerminator() && !isPromotableTo(&In, DestTy))
1084 for (Instruction *In : LoopIns)
1085 if (!
In->isTerminator())
1086 promoteTo(In, DestTy, LoopB);
1090 for (
auto I = ExitB->
begin();
I != End; ++
I) {
1094 Type *Ty0 =
P->getIncomingValue(0)->getType();
1095 Type *PTy =
P->getType();
1103 P->replaceAllUsesWith(
T);
1113bool PolynomialMultiplyRecognize::findCycle(
Value *Out,
Value *In,
1120 bool HadPhi =
false;
1122 for (
auto *U : Out->
users()) {
1124 if (
I ==
nullptr ||
I->getParent() != BB)
1132 if (IsPhi && HadPhi)
1137 if (findCycle(
I, In,
Cycle))
1141 return !
Cycle.empty();
1144void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1145 ValueSeq &
Cycle, ValueSeq &Early, ValueSeq &Late) {
1152 for (
I = 0;
I <
N; ++
I) {
1163 ValueSeq &
First = !IsE ? Early : Late;
1164 for (
unsigned J = 0; J <
I; ++J)
1167 ValueSeq &Second = IsE ? Early : Late;
1169 for (++
I;
I <
N; ++
I) {
1180bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1181 ValueSeq &Early, ValueSeq &Late) {
1185 if (UseI->
getOpcode() == Instruction::Select) {
1187 if (Early.count(TV) || Early.count(FV)) {
1188 if (Late.count(TV) || Late.count(FV))
1191 }
else if (Late.count(TV) || Late.count(FV)) {
1192 if (Early.count(TV) || Early.count(FV))
1204 bool AE =
true,
AL =
true;
1206 if (Early.count(&*
I))
1208 else if (Late.count(&*
I))
1232bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *
I) {
1233 switch (
I->getOpcode()) {
1234 case Instruction::And:
1235 case Instruction::Or:
1236 case Instruction::Xor:
1237 case Instruction::LShr:
1238 case Instruction::Shl:
1239 case Instruction::Select:
1240 case Instruction::ICmp:
1241 case Instruction::PHI:
1249bool PolynomialMultiplyRecognize::highBitsAreZero(
Value *V,
1250 unsigned IterCount) {
1255 KnownBits Known(
T->getBitWidth());
1257 return Known.countMinLeadingZeros() >= IterCount;
1260bool PolynomialMultiplyRecognize::keepsHighBitsZero(
Value *V,
1261 unsigned IterCount) {
1265 return C->getValue().countl_zero() >= IterCount;
1268 switch (
I->getOpcode()) {
1269 case Instruction::And:
1270 case Instruction::Or:
1271 case Instruction::Xor:
1272 case Instruction::LShr:
1273 case Instruction::Select:
1274 case Instruction::ICmp:
1275 case Instruction::PHI:
1276 case Instruction::ZExt:
1284bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *
I,
Value *
Op) {
1285 unsigned Opc =
I->getOpcode();
1286 if (
Opc == Instruction::Shl ||
Opc == Instruction::LShr)
1287 return Op !=
I->getOperand(1);
1291bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1292 BasicBlock *ExitB,
unsigned IterCount) {
1293 Value *CIV = getCountIV(LoopB);
1297 if (CIVTy ==
nullptr)
1301 ValueSeq Early, Late, Cycled;
1304 for (Instruction &
I : *LoopB) {
1305 using namespace PatternMatch;
1311 if (!findCycle(&
I, V,
C))
1316 classifyCycle(&
I,
C, Early, Late);
1317 Cycled.insert_range(
C);
1324 for (
unsigned i = 0; i <
Users.size(); ++i) {
1331 if (!commutesWithShift(R))
1333 for (User *U :
R->users()) {
1342 if (!classifyInst(
T, Early, Late))
1353 for (
unsigned i = 0; i <
Internal.size(); ++i) {
1359 if (
T &&
T->getParent() != LoopB)
1365 for (
Value *V : Inputs)
1366 if (!highBitsAreZero(V, IterCount))
1369 if (!keepsHighBitsZero(V, IterCount))
1374 std::map<Value*,Value*> ShiftMap;
1376 using CastMapType = std::map<std::pair<Value *, Type *>,
Value *>;
1378 CastMapType CastMap;
1381 IntegerType *Ty) ->
Value * {
1382 auto [
H,
Inserted] = CM.try_emplace(std::make_pair(V, Ty));
1384 H->second = IRB.CreateIntCast(V, Ty,
false);
1388 for (
auto I = LoopB->begin(),
E = LoopB->end();
I !=
E; ++
I) {
1389 using namespace PatternMatch;
1402 for (
auto &J :
I->operands()) {
1404 if (!isOperandShifted(&*
I,
Op))
1412 auto F = ShiftMap.find(
Op);
1413 Value *
W = (
F != ShiftMap.end()) ?
F->second :
nullptr;
1415 IRB.SetInsertPoint(&*
I);
1419 Value *ShAmt = CIV, *ShVal =
Op;
1422 if (Late.count(&*
I))
1423 ShVal = IRB.CreateShl(
Op, ConstantInt::get(VTy, 1));
1427 if (VTy->getBitWidth() < ATy->getBitWidth())
1428 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1430 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1433 W = IRB.CreateShl(ShVal, ShAmt);
1434 ShiftMap.insert(std::make_pair(
Op, W));
1436 I->replaceUsesOfWith(
Op, W);
1446 for (
auto P = ExitB->
begin(), Q = ExitB->
end();
P != Q; ++
P) {
1450 Value *
U = PN->getIncomingValueForBlock(LoopB);
1451 if (!
Users.count(U))
1453 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1454 PN->replaceAllUsesWith(S);
1465void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1466 for (
auto &
I : *LoopB)
1468 I.replaceAllUsesWith(SV);
1474unsigned PolynomialMultiplyRecognize::getInverseMxN(
unsigned QP) {
1477 std::array<char,32> Q,
C;
1479 for (
unsigned i = 0; i < 32; ++i) {
1496 for (
unsigned i = 1; i < 32; ++i) {
1504 for (
unsigned j = 0;
j < i; ++
j)
1505 T =
T ^ (
C[j] & Q[i-j]);
1510 for (
unsigned i = 0; i < 32; ++i)
1520 Module *
M = At->getParent()->getParent()->getParent();
1525 unsigned IC = PV.IterCount;
1527 if (PV.M !=
nullptr)
1528 P0 =
P =
B.CreateXor(
P, PV.M);
1533 if (PV.IterCount != 32)
1534 P =
B.CreateAnd(
P, BMI);
1538 assert(QI && QI->getBitWidth() <= 32);
1541 unsigned M = (1 << PV.IterCount) - 1;
1542 unsigned Tmp = (QI->getZExtValue() | 1) &
M;
1543 unsigned QV = getInverseMxN(Tmp) &
M;
1544 auto *QVI = ConstantInt::get(QI->getType(), QV);
1545 P =
B.CreateCall(PMF, {
P, QVI});
1546 P =
B.CreateTrunc(
P, QI->getType());
1548 P =
B.CreateAnd(
P, BMI);
1551 Value *
R =
B.CreateCall(PMF, {
P, Q});
1553 if (PV.M !=
nullptr)
1554 R =
B.CreateXor(R,
B.CreateIntCast(P0,
R->getType(),
false));
1561 return CI->getValue().isNonNegative();
1565 switch (
I->getOpcode()) {
1566 case Instruction::LShr:
1568 return SI->getZExtValue() > 0;
1570 case Instruction::Or:
1571 case Instruction::Xor:
1574 case Instruction::And:
1581void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1582 S.addRule(
"sink-zext",
1584 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1585 if (
I->getOpcode() != Instruction::ZExt)
1590 switch (
T->getOpcode()) {
1591 case Instruction::And:
1592 case Instruction::Or:
1593 case Instruction::Xor:
1600 B.CreateZExt(
T->getOperand(0),
I->getType()),
1601 B.CreateZExt(
T->getOperand(1),
I->getType()));
1603 S.addRule(
"xor/and -> and/xor",
1605 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1606 if (
I->getOpcode() != Instruction::Xor)
1612 if (And0->
getOpcode() != Instruction::And ||
1621 S.addRule(
"sink binop into select",
1624 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1631 Value *
X = Sel->getTrueValue(), *
Y = Sel->getFalseValue();
1633 return B.CreateSelect(Sel->getCondition(),
1634 B.CreateBinOp(
Op,
X, Z),
1635 B.CreateBinOp(
Op,
Y, Z));
1640 Value *
Y = Sel->getTrueValue(), *
Z = Sel->getFalseValue();
1641 return B.CreateSelect(Sel->getCondition(),
1642 B.CreateBinOp(
Op,
X,
Y),
1643 B.CreateBinOp(
Op,
X, Z));
1647 S.addRule(
"fold select-select",
1650 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1657 if (Sel0->getCondition() ==
C)
1661 if (Sel1->getCondition() ==
C)
1662 return B.CreateSelect(
C, Sel->
getTrueValue(), Sel1->getFalseValue());
1666 S.addRule(
"or-signbit -> xor-signbit",
1668 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1669 if (
I->getOpcode() != Instruction::Or)
1676 return IRBuilder<>(Ctx).CreateXor(
I->getOperand(0), Msb);
1678 S.addRule(
"sink lshr into binop",
1680 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1681 if (
I->getOpcode() != Instruction::LShr)
1687 case Instruction::And:
1688 case Instruction::Or:
1689 case Instruction::Xor:
1695 Value *S =
I->getOperand(1);
1700 S.addRule(
"expose bitop-const",
1702 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1703 auto IsBitOp = [](
unsigned Op) ->
bool {
1705 case Instruction::And:
1706 case Instruction::Or:
1707 case Instruction::Xor:
1713 if (!BitOp1 || !IsBitOp(BitOp1->
getOpcode()))
1716 if (!BitOp2 || !IsBitOp(BitOp2->
getOpcode()))
1727 S.addRule(
"select with trunc cond to select with icmp cond",
1730 [](Instruction *
I, LLVMContext &Ctx) ->
Value * {
1736 using namespace PatternMatch;
1742 Type *Ty =
X->getType();
1743 Value *
And =
B.CreateAnd(
X, ConstantInt::get(Ty, 1));
1745 : ICmpInst::ICMP_EQ,
1746 And, ConstantInt::get(Ty, 0));
1752void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1753 S.addRule(
"(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1754 [](Instruction *
I, LLVMContext &Ctx) ->
Value* {
1755 if (
I->getOpcode() != Instruction::And)
1761 if (
Xor->getOpcode() != Instruction::Xor)
1766 if (!And0 || And0->
getOpcode() != Instruction::And)
1773 if (V0 != (V0 & V1))
1776 return B.CreateAnd(
B.CreateXor(And0->
getOperand(0), And1), C0);
1780bool PolynomialMultiplyRecognize::recognize() {
1781 LLVM_DEBUG(
dbgs() <<
"Starting PolynomialMultiplyRecognize on loop\n"
1782 << *CurLoop <<
'\n');
1791 if (LoopB != CurLoop->getLoopLatch())
1794 if (ExitB ==
nullptr)
1796 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1797 if (EntryB ==
nullptr)
1800 unsigned IterCount = 0;
1801 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1805 IterCount = CV->getValue()->getZExtValue() + 1;
1807 Value *CIV = getCountIV(LoopB);
1812 PV.IterCount = IterCount;
1813 LLVM_DEBUG(
dbgs() <<
"Loop IV: " << *CIV <<
"\nIterCount: " << IterCount
1816 setupPreSimplifier(PreSimp);
1824 bool FoundPreScan =
false;
1825 auto FeedsPHI = [LoopB](
const Value *
V) ->
bool {
1826 for (
const Value *U :
V->users()) {
1828 if (
P->getParent() == LoopB)
1833 for (Instruction &In : *LoopB) {
1835 if (!SI || !FeedsPHI(SI))
1838 Simplifier::Context
C(SI);
1839 Value *
T = PreSimp.simplify(
C);
1841 LLVM_DEBUG(
dbgs() <<
"scanSelect(pre-scan): " << PE(
C, SelI) <<
'\n');
1842 if (scanSelect(SelI, LoopB, EntryB, CIV, PV,
true)) {
1843 FoundPreScan =
true;
1845 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1846 SI->replaceAllUsesWith(NewSel);
1853 if (!FoundPreScan) {
1863 if (!promoteTypes(LoopB, ExitB))
1866 Simplifier PostSimp;
1867 setupPostSimplifier(PostSimp);
1868 for (Instruction &In : *LoopB) {
1870 if (!SI || !FeedsPHI(SI))
1872 Simplifier::Context
C(SI);
1873 Value *
T = PostSimp.simplify(
C);
1876 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1877 SI->replaceAllUsesWith(NewSel);
1883 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1885 cleanupLoopBody(LoopB);
1889 bool FoundScan =
false;
1890 for (Instruction &In : *LoopB) {
1895 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV,
false);
1902 StringRef PP = (PV.M ?
"(P+M)" :
"P");
1904 dbgs() <<
"Found pmpy idiom: R = " << PP <<
".Q\n";
1906 dbgs() <<
"Found inverse pmpy idiom: R = (" << PP <<
"/Q).Q) + "
1908 dbgs() <<
" Res:" << *PV.Res <<
"\n P:" << *PV.P <<
"\n";
1910 dbgs() <<
" M:" << *PV.M <<
"\n";
1911 dbgs() <<
" Q:" << *PV.Q <<
"\n";
1912 dbgs() <<
" Iteration count:" << PV.IterCount <<
"\n";
1916 Value *PM = generate(At, PV);
1920 if (PM->
getType() != PV.Res->getType())
1921 PM =
IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(),
false);
1923 PV.Res->replaceAllUsesWith(PM);
1924 PV.Res->eraseFromParent();
1928int HexagonLoopIdiomRecognize::getSCEVStride(
const SCEVAddRecExpr *S) {
1930 return SC->getAPInt().getSExtValue();
1934bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1939 Value *StoredVal =
SI->getValueOperand();
1940 Value *StorePtr =
SI->getPointerOperand();
1943 uint64_t SizeInBits =
DL->getTypeSizeInBits(StoredVal->
getType());
1944 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1951 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) {
1953 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NonAffineStorePtr",
1954 SI->getDebugLoc(),
SI->getParent())
1955 <<
"store pointer is not an affine AddRec";
1962 int Stride = getSCEVStride(StoreEv);
1965 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1966 if (StoreSize !=
unsigned(std::abs(Stride))) {
1968 return OptimizationRemarkMissed(
DEBUG_TYPE,
"StrideSizeMismatch",
1969 SI->getDebugLoc(),
SI->getParent())
1970 <<
"stride does not match store size";
1979 return OptimizationRemarkMissed(
DEBUG_TYPE,
"StoreNotFeedingLoad",
1980 SI->getDebugLoc(),
SI->getParent())
1981 <<
"store value is not a simple load";
1991 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) {
1993 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NonAffineLoadPtr",
1995 <<
"load pointer is not an affine AddRec";
2001 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
2013 const SCEV *BECount,
unsigned StoreSize,
2033 for (
auto *
B : L->blocks())
2035 if (Ignored.
count(&
I) == 0 &&
2042void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2043 SmallVectorImpl<StoreInst*> &Stores) {
2045 for (Instruction &
I : *BB)
2047 if (isLegalStore(CurLoop, SI))
2051bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2052 StoreInst *SI,
const SCEV *BECount) {
2054 "Expected only non-volatile stores, or Hexagon-specific memcpy"
2055 "to volatile destination.");
2057 Value *StorePtr =
SI->getPointerOperand();
2059 unsigned Stride = getSCEVStride(StoreEv);
2060 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
2061 if (Stride != StoreSize)
2076 SCEVExpander Expander(*SE,
"hexagon-loop-idiom");
2078 Type *IntPtrTy = Builder.getIntPtrTy(*
DL,
SI->getPointerAddressSpace());
2086 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2087 Builder.getPtrTy(
SI->getPointerAddressSpace()), ExpPt);
2088 Value *LoadBasePtr =
nullptr;
2090 bool Overlap =
false;
2091 bool DestVolatile =
SI->isVolatile();
2097 if (StoreSize != 4 ||
DL->getTypeSizeInBits(BECountTy) > 32) {
2101 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2103 StoreBasePtr =
nullptr;
2107 LoadBasePtr =
nullptr;
2113 SmallPtrSet<Instruction*, 2> Ignore1;
2116 StoreSize, *AA, Ignore1)) {
2120 BECount, StoreSize, *AA, Ignore1)) {
2123 return OptimizationRemarkMissed(
DEBUG_TYPE,
"MemoryAlias",
2124 SI->getDebugLoc(),
SI->getParent())
2125 <<
"memory aliasing prevents memcpy/memmove";
2127 goto CleanupAndExit;
2136 return OptimizationRemarkMissed(
DEBUG_TYPE,
"MemcpyDisabled",
2137 SI->getDebugLoc(),
SI->getParent())
2138 <<
"memcpy idiom is disabled or unavailable";
2140 goto CleanupAndExit;
2146 if (
Func->hasFnAttribute(Attribute::AlwaysInline))
2147 goto CleanupAndExit;
2153 SmallVector<Instruction*,2> Insts;
2156 if (!coverLoop(CurLoop, Insts)) {
2158 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExtraLoopInstructions",
2159 SI->getDebugLoc(),
SI->getParent())
2160 <<
"loop contains instructions beyond load/store pair";
2162 goto CleanupAndExit;
2167 return OptimizationRemarkMissed(
DEBUG_TYPE,
"MemmoveDisabled",
2168 SI->getDebugLoc(),
SI->getParent())
2169 <<
"memmove idiom is disabled or unavailable";
2171 goto CleanupAndExit;
2176 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NestedLoop",
2177 SI->getDebugLoc(),
SI->getParent())
2178 <<
"memmove skipped for nested loop";
2180 goto CleanupAndExit;
2186 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2189 SmallPtrSet<Instruction*, 2> Ignore2;
2192 StoreSize, *AA, Ignore2))
2193 goto CleanupAndExit;
2196 bool StridePos = getSCEVStride(LoadEv) >= 0;
2199 if (!StridePos && DestVolatile)
2200 goto CleanupAndExit;
2202 bool RuntimeCheck = (Overlap || DestVolatile);
2207 SmallVector<BasicBlock*, 8> ExitBlocks;
2209 if (ExitBlocks.
size() != 1)
2210 goto CleanupAndExit;
2211 ExitB = ExitBlocks[0];
2216 LLVMContext &Ctx =
SI->getContext();
2217 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2220 const SCEV *NumBytesS =
2221 SE->getAddExpr(BECount, SE->getOne(IntPtrTy),
SCEV::FlagNUW);
2223 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2225 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2235 uint64_t
C = CI->getZExtValue();
2236 if (Threshold != 0 &&
C < Threshold)
2237 goto CleanupAndExit;
2239 goto CleanupAndExit;
2244 Loop *ParentL = LF->getLoopFor(Preheader);
2245 StringRef HeaderName = Header->getName();
2254 for (
auto &In : *Header) {
2262 DT->addNewBlock(NewPreheader, Preheader);
2263 DT->changeImmediateDominator(Header, NewPreheader);
2271 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2272 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2273 Value *LowA = StridePos ? SA : LA;
2274 Value *HighA = StridePos ? LA : SA;
2275 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2280 Value *Dist = Builder.CreateSub(LowA, HighA);
2281 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2282 Value *CmpEither = Builder.CreateOr(
Cond, CmpD);
2285 if (Threshold != 0) {
2287 Value *Thr = ConstantInt::get(Ty, Threshold);
2288 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2289 Value *CmpBoth = Builder.CreateAnd(
Cond, CmpB);
2293 Func, NewPreheader);
2297 Builder.CreateCondBr(
Cond, MemmoveB, NewPreheader);
2300 DT->addNewBlock(MemmoveB, Preheader);
2304 ExitD = DT->findNearestCommonDominator(ExitD,
PB);
2312 if (ExitD && DT->dominates(Preheader, ExitD)) {
2320 CondBuilder.CreateBr(ExitB);
2325 Type *PtrTy = PointerType::get(Ctx, 0);
2326 Type *VoidTy = Type::getVoidTy(Ctx);
2330 StringRef HexagonVolatileMemcpyName =
2332 RTLIB::impl_hexagon_memcpy_forward_vp4cp4n2);
2333 FunctionCallee Fn =
M->getOrInsertFunction(
2334 HexagonVolatileMemcpyName, VoidTy, PtrTy, PtrTy,
Int32Ty);
2336 const SCEV *OneS = SE->getConstant(
Int32Ty, 1);
2337 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount,
Int32Ty);
2338 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS,
SCEV::FlagNUW);
2339 Value *NumWords = Expander.expandCodeFor(NumWordsS,
Int32Ty,
2345 NewCall = CondBuilder.CreateCall(Fn,
2346 {StoreBasePtr, LoadBasePtr, NumWords});
2348 NewCall = CondBuilder.CreateMemMove(
2349 StoreBasePtr,
SI->getAlign(), LoadBasePtr, LI->
getAlign(), NumBytes);
2352 NewCall = Builder.CreateMemCpy(StoreBasePtr,
SI->getAlign(), LoadBasePtr,
2361 LLVM_DEBUG(
dbgs() <<
" Formed " << (Overlap ?
"memmove: " :
"memcpy: ")
2363 <<
" from load ptr=" << *LoadEv <<
" at: " << *LI <<
"\n"
2364 <<
" from store ptr=" << *StoreEv <<
" at: " << *SI
2369 return OptimizationRemark(
DEBUG_TYPE,
"LoopToMemmove", DLoc,
2371 <<
"converted loop to memmove";
2375 return OptimizationRemark(
DEBUG_TYPE,
"LoopToMemcpy", DLoc,
2377 <<
"converted loop to memcpy";
2387bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2388 SmallVectorImpl<Instruction*> &Insts)
const {
2389 SmallPtrSet<BasicBlock *, 8> LoopBlocks;
2398 for (
unsigned i = 0; i < Worklist.size(); ++i) {
2400 for (
auto I =
In->op_begin(),
E =
In->op_end();
I !=
E; ++
I) {
2407 Worklist.insert(OpI);
2415 for (
auto *
B :
L->blocks()) {
2416 for (
auto &In : *
B) {
2419 if (!Worklist.count(&In) &&
In.mayHaveSideEffects())
2421 for (
auto *K :
In.users()) {
2426 if (LF->getLoopFor(UseB) != L)
2438bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2439 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2443 auto DominatedByBB = [
this,BB] (
BasicBlock *EB) ->
bool {
2444 return DT->dominates(BB, EB);
2446 if (!
all_of(ExitBlocks, DominatedByBB))
2449 bool MadeChange =
false;
2451 SmallVector<StoreInst*,8> Stores;
2452 collectStores(CurLoop, BB, Stores);
2455 for (
auto &SI : Stores)
2456 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2461bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2462 PolynomialMultiplyRecognize PMR(L, *
DL, *DT, *TLI, *SE);
2463 if (PMR.recognize()) {
2465 return OptimizationRemark(
DEBUG_TYPE,
"PolynomialMultiply",
2466 L->getStartLoc(),
L->getHeader())
2467 <<
"recognized polynomial multiply idiom";
2472 if (!HasMemcpy && !HasMemmove)
2475 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2477 "runOnCountableLoop() called on a loop without a predictable"
2478 "backedge-taken count");
2480 SmallVector<BasicBlock *, 8> ExitBlocks;
2481 L->getUniqueExitBlocks(ExitBlocks);
2486 for (
auto *BB :
L->getBlocks()) {
2488 if (LF->getLoopFor(BB) != L)
2490 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2496bool HexagonLoopIdiomRecognize::run(Loop *L) {
2497 const Module &
M = *
L->getHeader()->getParent()->getParent();
2503 if (!
L->getLoopPreheader()) {
2505 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NoPreheader",
2506 L->getStartLoc(),
L->getHeader())
2507 <<
"loop not in canonical form (no preheader)";
2513 StringRef
Name =
L->getHeader()->getParent()->getName();
2514 if (Name ==
"memset" || Name ==
"memcpy" || Name ==
"memmove")
2517 DL = &
L->getHeader()->getDataLayout();
2519 HasMemcpy = TLI->has(LibFunc_memcpy);
2520 HasMemmove = TLI->has(LibFunc_memmove);
2522 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2523 return runOnCountableLoop(L);
2526 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NonCountableLoop",
2527 L->getStartLoc(),
L->getHeader())
2528 <<
"backedge-taken count is not loop-invariant";
2533bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
2534 LPPassManager &LPM) {
2538 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2539 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2540 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2541 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2542 *
L->getHeader()->getParent());
2543 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2544 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2545 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE, ORE).run(L);
2549 return new HexagonLoopIdiomRecognizeLegacyPass();
2557 return HexagonLoopIdiomRecognize(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
TLI, &AR.
SE, ORE)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ATTRIBUTE_USED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static cl::opt< unsigned > SimplifyLimit("hlir-simplify-limit", cl::init(10000), cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"))
static cl::opt< bool > DisableMemcpyIdiom("disable-memcpy-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memcpy in loop idiom recognition"))
static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB)
static cl::opt< unsigned > RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " "check guarding the memmove."))
static cl::opt< bool > HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), cl::desc("Enable Hexagon-specific memcpy for volatile destination."))
static cl::opt< bool > DisableMemmoveIdiom("disable-memmove-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memmove in loop idiom recognition"))
static cl::opt< unsigned > CompileTimeMemSizeThreshold("compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), cl::desc("Threshold (in bytes) to perform the transformation, if the " "runtime loop count (mem transfer size) is known at compile-time."))
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
static bool hasZeroSignBit(const Value *V)
static cl::opt< bool > OnlyNonNestedMemmove("only-nonnested-memmove-idiom", cl::Hidden, cl::init(true), cl::desc("Only enable generating memmove in non-nested loops"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Move duplicate certain instructions close to their use
This header provides classes for managing per-loop analyses.
Machine Check Debug Module
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
BinaryOps getOpcode() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
void setIDom(DomTreeNodeBase *NewIDom)
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
Representation for a specific memory location.
void setIncomingBlock(unsigned i, BasicBlock *BB)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents a constant integer value.
SCEVUse getOperand(unsigned i) const
This class represents an analyzed expression in the program.
static constexpr auto FlagNUW
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
Provides information about what library functions are available for the current target.
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
LLVM_ABI void link(std::unique_ptr< LinkGraph > G, std::unique_ptr< JITLinkContext > Ctx)
Link the given graph.
NodeAddr< UseNode * > Use
NodeAddr< FuncNode * > Func
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto pred_end(const MachineBasicBlock *BB)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
constexpr from_range_t from_range
Pass * createHexagonLoopIdiomPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI char & LoopSimplifyID
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
bool isModOrRefSet(const ModRefInfo MRI)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
PredIterator< BasicBlock, Value::user_iterator > pred_iterator
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.