31#include "llvm/IR/IntrinsicsHexagon.h"
50#define DEBUG_TYPE "hexagon-vlcr"
53 "Number of values that were reused from a previous iteration.");
57 cl::desc(
"Maximum distance of loop carried dependences that are handled"),
66 ChainOfDependences Chain;
69 bool isIdentical(DepChain &
Other)
const {
72 ChainOfDependences &OtherChain =
Other.getChain();
73 for (
int i = 0; i <
size(); ++i) {
74 if (Chain[i] != OtherChain[i])
80 ChainOfDependences &getChain() {
92 void push_back(Instruction *
I) {
96 int iterations()
const {
101 return Chain.front();
112 friend raw_ostream &
operator<< (raw_ostream &OS,
const DepChain &
D);
117 const ChainOfDependences &CD =
D.Chain;
118 int ChainSize = CD.size();
119 OS <<
"**DepChain Start::**\n";
120 for (
int i = 0; i < ChainSize -1; ++i) {
121 OS << *(CD[i]) <<
" -->\n";
123 OS << *CD[ChainSize-1] <<
"\n";
134 std::map<Instruction *, DepChain *> DepChains;
137 ReuseValue() =
default;
140 Inst2Replace =
nullptr;
141 BackedgeInst =
nullptr;
145 bool isDefined() {
return Inst2Replace !=
nullptr; }
150 OS <<
"** ReuseValue ***\n";
151 OS <<
"Instruction to Replace: " << *(RU.Inst2Replace) <<
"\n";
152 OS <<
"Backedge Instruction: " << *(RU.BackedgeInst) <<
"\n";
156 class HexagonVectorLoopCarriedReuseLegacyPass :
public LoopPass {
160 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {}
162 StringRef getPassName()
const override {
163 return "Hexagon-specific loop carried reuse for HVX vectors";
166 void getAnalysisUsage(AnalysisUsage &AU)
const override {
169 AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
174 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
177 class HexagonVectorLoopCarriedReuse {
179 HexagonVectorLoopCarriedReuse(Loop *L, OptimizationRemarkEmitter &ORE)
180 : CurLoop(
L), ORE(ORE) {};
185 SetVector<DepChain *> Dependences;
186 std::set<Instruction *> ReplacedInsts;
188 OptimizationRemarkEmitter &ORE;
189 ReuseValue ReuseCandidate;
192 void findLoopCarriedDeps();
193 void findValueToReuse();
194 void findDepChainFromPHI(Instruction *
I, DepChain &
D);
197 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2,
int Iters);
198 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
199 bool canReplace(Instruction *
I);
200 bool isCallInstCommutative(CallInst *
C);
205char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
208 "Hexagon-specific predictive commoning for HVX vectors",
214 "Hexagon-specific predictive commoning for HVX vectors",
222 HexagonVectorLoopCarriedReuse Vlcr(&L, ORE);
230bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(
Loop *L,
234 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
235 HexagonVectorLoopCarriedReuse Vlcr(L, ORE);
239bool HexagonVectorLoopCarriedReuse::run() {
245 <<
"loop has no preheader";
253 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotInnermostLoop",
256 <<
"pass only works on innermost loops";
264 return OptimizationRemarkMissed(
DEBUG_TYPE,
"MultipleBlocks",
267 <<
"loop has multiple basic blocks";
275bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *
C) {
276 switch (
C->getCalledFunction()->getIntrinsicID()) {
277 case Intrinsic::hexagon_V6_vaddb:
278 case Intrinsic::hexagon_V6_vaddb_128B:
279 case Intrinsic::hexagon_V6_vaddh:
280 case Intrinsic::hexagon_V6_vaddh_128B:
281 case Intrinsic::hexagon_V6_vaddw:
282 case Intrinsic::hexagon_V6_vaddw_128B:
283 case Intrinsic::hexagon_V6_vaddubh:
284 case Intrinsic::hexagon_V6_vaddubh_128B:
285 case Intrinsic::hexagon_V6_vadduhw:
286 case Intrinsic::hexagon_V6_vadduhw_128B:
287 case Intrinsic::hexagon_V6_vaddhw:
288 case Intrinsic::hexagon_V6_vaddhw_128B:
289 case Intrinsic::hexagon_V6_vmaxb:
290 case Intrinsic::hexagon_V6_vmaxb_128B:
291 case Intrinsic::hexagon_V6_vmaxh:
292 case Intrinsic::hexagon_V6_vmaxh_128B:
293 case Intrinsic::hexagon_V6_vmaxw:
294 case Intrinsic::hexagon_V6_vmaxw_128B:
295 case Intrinsic::hexagon_V6_vmaxub:
296 case Intrinsic::hexagon_V6_vmaxub_128B:
297 case Intrinsic::hexagon_V6_vmaxuh:
298 case Intrinsic::hexagon_V6_vmaxuh_128B:
299 case Intrinsic::hexagon_V6_vminub:
300 case Intrinsic::hexagon_V6_vminub_128B:
301 case Intrinsic::hexagon_V6_vminuh:
302 case Intrinsic::hexagon_V6_vminuh_128B:
303 case Intrinsic::hexagon_V6_vminb:
304 case Intrinsic::hexagon_V6_vminb_128B:
305 case Intrinsic::hexagon_V6_vminh:
306 case Intrinsic::hexagon_V6_vminh_128B:
307 case Intrinsic::hexagon_V6_vminw:
308 case Intrinsic::hexagon_V6_vminw_128B:
309 case Intrinsic::hexagon_V6_vmpyub:
310 case Intrinsic::hexagon_V6_vmpyub_128B:
311 case Intrinsic::hexagon_V6_vmpyuh:
312 case Intrinsic::hexagon_V6_vmpyuh_128B:
313 case Intrinsic::hexagon_V6_vavgub:
314 case Intrinsic::hexagon_V6_vavgub_128B:
315 case Intrinsic::hexagon_V6_vavgh:
316 case Intrinsic::hexagon_V6_vavgh_128B:
317 case Intrinsic::hexagon_V6_vavguh:
318 case Intrinsic::hexagon_V6_vavguh_128B:
319 case Intrinsic::hexagon_V6_vavgw:
320 case Intrinsic::hexagon_V6_vavgw_128B:
321 case Intrinsic::hexagon_V6_vavgb:
322 case Intrinsic::hexagon_V6_vavgb_128B:
323 case Intrinsic::hexagon_V6_vavguw:
324 case Intrinsic::hexagon_V6_vavguw_128B:
325 case Intrinsic::hexagon_V6_vabsdiffh:
326 case Intrinsic::hexagon_V6_vabsdiffh_128B:
327 case Intrinsic::hexagon_V6_vabsdiffub:
328 case Intrinsic::hexagon_V6_vabsdiffub_128B:
329 case Intrinsic::hexagon_V6_vabsdiffuh:
330 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
331 case Intrinsic::hexagon_V6_vabsdiffw:
332 case Intrinsic::hexagon_V6_vabsdiffw_128B:
339bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
341 if (!
I1->isSameOperationAs(I2))
349 if (C1->getCalledFunction() != C2->getCalledFunction())
357 unsigned NumOperands =
I1->getNumOperands();
358 for (
unsigned i = 0; i < NumOperands; ++i) {
371bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *
I) {
376 switch (
II->getIntrinsicID()) {
377 case Intrinsic::hexagon_V6_hi:
378 case Intrinsic::hexagon_V6_lo:
379 case Intrinsic::hexagon_V6_hi_128B:
380 case Intrinsic::hexagon_V6_lo_128B:
387void HexagonVectorLoopCarriedReuse::findValueToReuse() {
388 for (
auto *
D : Dependences) {
389 LLVM_DEBUG(
dbgs() <<
"Processing dependence " << *(
D->front()) <<
"\n");
393 <<
".. Skipping because number of iterations > than the limit\n");
399 int Iters =
D->iterations();
402 <<
" can be reused\n");
404 SmallVector<Instruction *, 4> PNUsers;
405 for (Use &U : PN->
uses()) {
408 if (
User->getParent() != BB)
410 if (ReplacedInsts.count(User)) {
412 <<
" has already been replaced. Skipping...\n");
417 if (
User->mayHaveSideEffects())
419 if (!canReplace(User))
432 for (Instruction *
I : PNUsers) {
433 for (Use &U : BEInst->
uses()) {
438 if (!isEquivalentOperation(
I, BEUser))
441 int NumOperands =
I->getNumOperands();
452 std::map<Instruction *, DepChain *> DepChains;
454 if ((
I &&
I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
456 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
460 for (
int T = 0;
T < NumOperands; ++
T) {
463 if (!OpInst && !BEOpInst) {
470 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
473 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
477 DepChains[OpInst] =
D;
488 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
503 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
506 DepChains[OpInst] =
D;
515 ReuseCandidate.Inst2Replace =
I;
516 ReuseCandidate.BackedgeInst = BEUser;
517 ReuseCandidate.DepChains = std::move(DepChains);
518 ReuseCandidate.Iterations = Iters;
521 ReuseCandidate.reset();
525 ReuseCandidate.reset();
528Value *HexagonVectorLoopCarriedReuse::findValueInBlock(
Value *
Op,
536void HexagonVectorLoopCarriedReuse::reuseValue() {
538 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
541 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
542 int Iterations = ReuseCandidate.Iterations;
544 assert(!DepChains.empty() &&
"No DepChains");
545 LLVM_DEBUG(
dbgs() <<
"reuseValue is making the following changes\n");
547 SmallVector<Instruction *, 4> InstsInPreheader;
548 for (
int i = 0; i < Iterations; ++i) {
550 for (
int j = 0;
j < NumOperands; ++
j) {
555 DepChain &
D = *DepChains[
I];
559 Value *ValInPreheader = findValueInBlock(
D[i], LoopPH);
560 InstInPreheader->
setOperand(j, ValInPreheader);
562 InstsInPreheader.
push_back(InstInPreheader);
563 InstInPreheader->
setName(Inst2Replace->
getName() +
".hexagon.vlcr");
571 Value *BEVal = BEInst;
573 for (
int i = Iterations-1; i >=0 ; --i) {
574 Instruction *InstInPreheader = InstsInPreheader[i];
575 NewPhi = IRB.CreatePHI(InstInPreheader->
getType(), 2);
585 ReplacedInsts.insert(Inst2Replace);
586 ++HexagonNumVectorLoopCarriedReuse;
588 return OptimizationRemark(
DEBUG_TYPE,
"VectorReuse",
591 <<
"reused loop-carried vector value";
595bool HexagonVectorLoopCarriedReuse::doVLCR() {
597 "Can do VLCR on the innermost loop only");
599 "Can do VLCR only on single block loops");
610 findLoopCarriedDeps();
612 if (ReuseCandidate.isDefined()) {
618 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NoCandidateFound",
621 <<
"no loop-carried reuse candidate found";
629void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *
I,
637 if (NumIncomingValues != 2) {
652 assert(BEInst &&
"There should be a value over the backedge");
661 findDepChainFromPHI(BEInst,
D);
665DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
668 for (
auto *
D : Dependences) {
669 if (
D->front() == I1 &&
D->back() == I2 &&
D->iterations() == Iters)
675void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
682 DepChain *
D =
new DepChain();
683 findDepChainFromPHI(PN, *
D);
685 Dependences.insert(
D);
689 LLVM_DEBUG(
dbgs() <<
"Found " << Dependences.size() <<
" dependences\n");
694 return new HexagonVectorLoopCarriedReuseLegacyPass();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))
This defines the Use class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
iterator begin()
Instruction iterator methods.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Represents analyses that only rely on functions' control flow.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isVectorTy() const
True if this is an instance of VectorType.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction & back() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI char & LoopSimplifyID
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Pass * createHexagonVectorLoopCarriedReuseLegacyPass()
PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Run pass over the Loop.
HexagonVectorLoopCarriedReusePass()=default
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...