22#include "llvm/IR/IntrinsicsAMDGPU.h"
27#define DEBUG_TYPE "amdgpu-simplifylib"
33 cl::desc(
"Enable pre-link mode optimizations"),
38 cl::desc(
"Comma separated list of functions to replace with native, or all"),
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
58 bool AllNative =
false;
79 bool parseFunctionName(
const StringRef &FMangledName, FuncInfo &FInfo);
81 bool TDOFold(
CallInst *CI,
const FuncInfo &FInfo);
92 const FuncInfo &FInfo);
98 bool sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo);
101 bool evaluateScalarMathFunc(
const FuncInfo &FInfo,
APFloat &Res0,
103 bool evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo);
107 std::tuple<Value *, Value *, Value *> insertSinCos(
Value *Arg,
117 const FuncInfo &FInfo);
124 bool shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
125 bool AllowMinSizeF32 =
false,
126 bool AllowF64 =
false,
127 bool AllowStrictFP =
false);
133 bool AllowMinSizeF32 =
false,
134 bool AllowF64 =
false,
135 bool AllowStrictFP =
false);
143 I->replaceAllUsesWith(With);
144 I->eraseFromParent();
164template <
typename IRB>
166 const Twine &Name =
"") {
167 CallInst *R =
B.CreateCall(Callee, Arg, Name);
169 R->setCallingConv(
F->getCallingConv());
173template <
typename IRB>
176 CallInst *R =
B.CreateCall(Callee, {Arg1, Arg2}, Name);
178 R->setCallingConv(
F->getCallingConv());
188 {FT->getParamType(0), PowNExpTy},
false);
426 : AMDGPULibFunc::getFunction(
M, fInfo);
430 Module *M,
const FuncInfo &fInfo, FuncInfo &newInfo,
432 assert(NewFunc != FastVariant);
436 newInfo = AMDGPULibFunc(FastVariant, fInfo);
437 if (FunctionCallee NewCallee = getFunction(M, newInfo))
441 newInfo = AMDGPULibFunc(NewFunc, fInfo);
442 return getFunction(M, newInfo);
445bool AMDGPULibCalls::parseFunctionName(
const StringRef &FMangledName,
466bool AMDGPULibCalls::useNativeFunc(
const StringRef F)
const {
471 AllNative = useNativeFunc(
"all") ||
476bool AMDGPULibCalls::sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo) {
477 bool native_sin = useNativeFunc(
"sin");
478 bool native_cos = useNativeFunc(
"cos");
480 if (native_sin && native_cos) {
495 if (sinExpr && cosExpr) {
503 <<
" with native version of sin/cos");
518 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.
isMangled() ||
521 !(AllNative || useNativeFunc(FInfo.
getName()))) {
526 return sincosUseNative(aCI, FInfo);
535 <<
" with native version");
547 const FuncInfo &FInfo) {
549 if (!Callee->isDeclaration())
552 assert(Callee->hasName() &&
"Invalid read_pipe/write_pipe function");
553 auto *M = Callee->getParent();
554 std::string Name = std::string(Callee->getName());
556 if (NumArg != 4 && NumArg != 6)
562 if (!PacketSize || !PacketAlign)
567 if (Alignment !=
Size)
570 unsigned PtrArgLoc = CI->
arg_size() - 3;
575 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
579 Name = Name +
"_" + std::to_string(
Size);
588 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
590 Args.push_back(PtrArg);
592 auto *NCI =
B.CreateCall(
F, Args);
605 if (!Callee || Callee->isIntrinsic() || CI->
isNoBuiltin())
609 if (!parseFunctionName(Callee->getName(), FInfo))
619 if (TDOFold(CI, FInfo))
624 B.setIsFPConstrained(
true);
635 B.setFastMathFlags(FMF);
640 switch (FInfo.
getId()) {
644 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp,
649 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp2,
654 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log,
659 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log2,
664 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log10,
667 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::minnum,
670 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::maxnum,
673 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fma,
true,
676 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fmuladd,
679 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fabs,
true,
682 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::copysign,
685 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::floor,
true,
688 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::ceil,
true,
691 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::trunc,
true,
694 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::rint,
true,
697 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::round,
true,
700 if (!shouldReplaceLibcallWithIntrinsic(CI,
true,
true))
706 Value *SplatArg1 =
B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
712 {CI->getType(), CI->getArgOperand(1)->getType()}));
717 return tryOptimizePow(FPOp,
B, FInfo);
720 if (fold_pow(FPOp,
B, FInfo))
727 Module *M = Callee->getParent();
735 if (!shouldReplaceLibcallWithIntrinsic(CI))
741 if (fold_pow(FPOp,
B, FInfo))
748 Module *M = Callee->getParent();
756 if (!shouldReplaceLibcallWithIntrinsic(CI))
762 if (fold_rootn(FPOp,
B, FInfo))
768 Module *M = Callee->getParent();
770 if (
FunctionCallee RootnFastFunc = getFunction(M, RootnFastInfo)) {
780 return tryReplaceLibcallWithSimpleIntrinsic(
781 B, CI, Intrinsic::sqrt,
true,
true,
false);
784 return fold_sincos(FPOp,
B, FInfo);
790 switch (FInfo.
getId()) {
795 return fold_read_write_pipe(CI,
B, FInfo);
814 ConstValues.
push_back(ConstantFP::get(ElemTy, APF));
819bool AMDGPULibCalls::TDOFold(
CallInst *CI,
const FuncInfo &FInfo) {
825 int const sz = (int)tr.
size();
835 for (
int eltNo = 0; eltNo < vecSize; ++eltNo) {
838 auto MatchingRow =
llvm::find_if(tr, [eltval](
const TableEntry &entry) {
841 if (MatchingRow == tr.
end())
846 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *NewValues <<
"\n");
853 for (
int i = 0; i < sz; ++i) {
854 if (CF->isExactlyValue(tr[i].input)) {
855 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
856 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
869#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
878 const FuncInfo &FInfo) {
885 "fold_pow: encounter a wrong function call");
887 Module *
M =
B.GetInsertBlock()->getModule();
893 const APInt *CINT =
nullptr;
898 int ci_opr1 = (CINT ? (int)CINT->
getSExtValue() : 0x1111111);
900 if ((CF && CF->
isZero()) || (CINT && ci_opr1 == 0)) {
903 Constant *cnval = ConstantFP::get(eltType, 1.0);
912 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
"\n");
918 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
" * "
920 Value *nval =
B.CreateFMul(opr0, opr0,
"__pow2");
924 if ((CF && CF->
isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
926 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1 / " << *opr0 <<
"\n");
927 Constant *cnval = ConstantFP::get(eltType, 1.0);
931 Value *nval =
B.CreateFDiv(cnval, opr0,
"__powrecip");
939 if (FunctionCallee FPExpr =
943 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << FInfo.getName()
944 <<
'(' << *opr0 <<
")\n");
962 int ival = (int)dval;
963 if ((
double)ival == dval) {
966 ci_opr1 = 0x11111111;
971 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
972 if (abs_opr1 <= 12) {
976 cnval = ConstantFP::get(eltType, 1.0);
982 Value *valx2 =
nullptr;
984 while (abs_opr1 > 0) {
985 valx2 = valx2 ?
B.CreateFMul(valx2, valx2,
"__powx2") : opr0;
987 nval = nval ?
B.CreateFMul(nval, valx2,
"__powprod") : valx2;
994 cnval = ConstantFP::get(eltType, 1.0);
998 nval =
B.CreateFDiv(cnval, nval,
"__1powprod");
1001 << ((ci_opr1 < 0) ?
"1/prod(" :
"prod(") << *opr0
1012 FunctionCallee ExpExpr;
1013 if (ShouldUseIntrinsic)
1022 bool needlog =
false;
1023 bool needabs =
false;
1024 bool needcopysign =
false;
1035 V =
log2(std::abs(V));
1036 cnval = ConstantFP::get(eltType, V);
1054 "Wrong vector size detected");
1059 if (V < 0.0) needcopysign =
true;
1060 V =
log2(std::abs(V));
1065 for (
double D : DVal)
1087 nval =
B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0,
nullptr,
"__fabs");
1089 nval = cnval ? cnval : opr0;
1092 FunctionCallee LogExpr;
1093 if (ShouldUseIntrinsic) {
1108 opr1 =
B.CreateSIToFP(opr1, nval->
getType(),
"pownI2F");
1110 nval =
B.CreateFMul(opr1, nval,
"__ylogx");
1112 CallInst *Exp2Call =
CreateCallEx(
B, ExpExpr, nval,
"__exp2");
1128 opr_n =
B.CreateZExtOrTrunc(opr_n, nTy,
"__ytou");
1130 opr_n =
B.CreateFPToSI(opr1, nTy,
"__ytou");
1133 Value *sign =
B.CreateShl(opr_n,
size-1,
"__yeven");
1134 sign =
B.CreateAnd(
B.CreateBitCast(opr0, nTy), sign,
"__pow_sign");
1136 nval =
B.CreateCopySign(nval,
B.CreateBitCast(sign, nval->
getType()),
1137 nullptr,
"__pow_sign");
1141 <<
"exp2(" << *opr1 <<
" * log2(" << *opr0 <<
"))\n");
1148 const FuncInfo &FInfo) {
1152 const APInt *CINT =
nullptr;
1156 Function *Parent =
B.GetInsertBlock()->getParent();
1159 if (ci_opr1 == 1 && !Parent->
hasFnAttribute(Attribute::StrictFP)) {
1163 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
'\n');
1168 Module *
M =
B.GetInsertBlock()->getModule();
1172 shouldReplaceLibcallWithIntrinsic(CI,
1176 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> sqrt(" << *opr0 <<
")\n");
1178 CallInst *NewCall =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1183 MDBuilder MDHelper(
M->getContext());
1184 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->
getFPAccuracy(), 2.0f));
1185 NewCall->
setMetadata(LLVMContext::MD_fpmath, FPMD);
1192 if (FunctionCallee FPExpr =
1194 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> cbrt(" << *opr0
1200 }
else if (ci_opr1 == -1) {
1201 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1.0 / " << *opr0 <<
"\n");
1202 Value *nval =
B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0),
1209 if (ci_opr1 == -2 &&
1210 shouldReplaceLibcallWithIntrinsic(CI,
1217 MDBuilder MDHelper(
M->getContext());
1218 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->
getFPAccuracy(), 2.0f));
1224 CallInst *Sqrt =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1226 B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0), Sqrt));
1231 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> rsqrt(" << *opr0
1242 Value *TruncY =
B.CreateUnaryIntrinsic(Intrinsic::trunc,
Y);
1243 return B.CreateFCmpOEQ(TruncY,
Y);
1248 auto *HalfY =
B.CreateFMul(
Y, ConstantFP::get(
Y->getType(), 0.5));
1256 Value *NotEvenY =
B.CreateNot(IsEvenY);
1257 return B.CreateAnd(IsIntY, NotEvenY);
1262 auto *fabsVal =
B.CreateUnaryIntrinsic(Intrinsic::fabs, val);
1268 Value *AbsX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1269 Value *LogAbsX =
B.CreateUnaryIntrinsic(Intrinsic::log2, AbsX);
1270 Value *YTimesLogX =
B.CreateFMul(
Y, LogAbsX);
1271 return B.CreateUnaryIntrinsic(Intrinsic::exp2, YTimesLogX);
1281 Constant *One = ConstantFP::get(
X->getType(), 1.0);
1291 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1292 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1296 Value *condNegX =
B.CreateFCmpOLT(
X, Zero);
1297 Value *condNotIntY =
B.CreateNot(IsIntY);
1298 Value *condNaN =
B.CreateAnd(condNegX, condNotIntY);
1299 Ret =
B.CreateSelect(condNaN, QNaN, Ret);
1307 Value *AY =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
Y);
1308 Value *YIsNegInf =
B.CreateFCmpUNE(
Y, AY);
1310 Value *AX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1311 Value *AxEqOne =
B.CreateFCmpOEQ(AX, One);
1312 Value *AxLtOne =
B.CreateFCmpOLT(AX, One);
1313 Value *XorCond =
B.CreateXor(AxLtOne, YIsNegInf);
1315 B.CreateSelect(AxEqOne, AX,
B.CreateSelect(XorCond, Zero, AY));
1316 Ret =
B.CreateSelect(YIsInf, SelInf, Ret);
1320 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1321 Value *AxInfOrZero =
B.CreateOr(XIsInf, XEqZero);
1322 Value *YLtZero =
B.CreateFCmpOLT(
Y, Zero);
1323 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1324 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1325 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1326 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1327 Ret =
B.CreateSelect(AxInfOrZero, Copysign, Ret);
1330 Value *isUnordered =
B.CreateFCmpUNO(
X,
Y);
1331 return B.CreateSelect(isUnordered, QNaN, Ret);
1334 Value *YIsNeg =
B.CreateFCmpOLT(
Y, Zero);
1335 Value *IZ =
B.CreateSelect(YIsNeg, PInf, Zero);
1336 Value *ZI =
B.CreateSelect(YIsNeg, Zero, PInf);
1338 Value *YEqZero =
B.CreateFCmpOEQ(
Y, Zero);
1339 Value *SelZeroCase =
B.CreateSelect(YEqZero, QNaN, IZ);
1340 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1341 Value *Ret =
B.CreateSelect(XEqZero, SelZeroCase, ExpYLnX);
1343 Value *XEqInf =
B.CreateFCmpOEQ(
X, PInf);
1344 Value *YNeZero =
B.CreateFCmpUNE(
Y, Zero);
1345 Value *CondInfCase =
B.CreateAnd(XEqInf, YNeZero);
1346 Ret =
B.CreateSelect(CondInfCase, ZI, Ret);
1349 Value *XNeOne =
B.CreateFCmpUNE(
X, One);
1350 Value *CondInfY =
B.CreateAnd(IsInfY, XNeOne);
1351 Value *XLtOne =
B.CreateFCmpOLT(
X, One);
1352 Value *SelInfYCase =
B.CreateSelect(XLtOne, IZ, ZI);
1353 Ret =
B.CreateSelect(CondInfY, SelInfYCase, Ret);
1355 Value *IsUnordered =
B.CreateFCmpUNO(
X,
Y);
1356 return B.CreateSelect(IsUnordered, QNaN, Ret);
1359 Constant *ZeroI = ConstantInt::get(
Y->getType(), 0);
1362 Value *OneI = ConstantInt::get(
Y->getType(), 1);
1363 Value *YAnd1 =
B.CreateAnd(
Y, OneI);
1364 Value *IsOddY =
B.CreateICmpNE(YAnd1, ZeroI);
1367 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1368 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1371 Value *FabsX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1372 Value *XIsInf =
B.CreateFCmpOEQ(FabsX, PInf);
1373 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1374 Value *InfOrZero =
B.CreateOr(XIsInf, XEqZero);
1377 Value *YLtZero =
B.CreateICmpSLT(
Y, ZeroI);
1378 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1379 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1382 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1383 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1385 return B.CreateSelect(InfOrZero, Copysign, Ret);
1388 Constant *ZeroI = ConstantInt::get(
Y->getType(), 0);
1391 Value *YAnd1 =
B.CreateAnd(
Y, ConstantInt::get(
Y->getType(), 1));
1392 Value *IsOddY =
B.CreateICmpNE(YAnd1, ZeroI);
1395 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1396 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1399 Value *FabsX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1400 Value *IsInfX =
B.CreateFCmpOEQ(FabsX, PInf);
1401 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1402 Value *CondInfOrZero =
B.CreateOr(IsInfX, XEqZero);
1405 Value *YLtZero =
B.CreateICmpSLT(
Y, ZeroI);
1406 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1407 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1410 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1411 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1413 Ret =
B.CreateSelect(CondInfOrZero, Copysign, Ret);
1416 Value *XIsNeg =
B.CreateFCmpOLT(
X, Zero);
1417 Value *NotOddY =
B.CreateNot(IsOddY);
1418 Value *CondNegAndNotOdd =
B.CreateAnd(XIsNeg, NotOddY);
1419 Value *YEqZero =
B.CreateICmpEQ(
Y, ZeroI);
1420 Value *CondBad =
B.CreateOr(CondNegAndNotOdd, YEqZero);
1421 return B.CreateSelect(CondBad, QNaN, Ret);
1446 Constant *One = ConstantFP::get(
X->getType(), 1.0);
1449 Value *XEqOne =
B.CreateFCmpOEQ(
X, One);
1450 Y =
B.CreateSelect(XEqOne, One,
Y);
1454 X =
B.CreateSelect(YEqZero, One,
X);
1472 Value *YEqZero =
B.CreateICmpEQ(
Y, ConstantInt::get(
Y->getType(), 0));
1475 X =
B.CreateSelect(YEqZero, ConstantFP::get(
X->getType(), 1.0),
X);
1477 Value *CastY =
B.CreateSIToFP(
Y,
X->getType());
1484 Value *CastY =
B.CreateSIToFP(
Y,
X->getType());
1487 Value *RcpY =
B.CreateFDiv(ConstantFP::get(
X->getType(), 1.0), CastY);
1499 const FuncInfo &FInfo) {
1509 FunctionCallee PowrFunc = getFloatFastVariant(
1517 SQ.getWithInstruction(
Call))) {
1519 return fold_pow(FPOp,
B, PowrInfo) ||
true;
1532 FunctionCallee PownFunc = getFloatFastVariant(
1540 B.CreateFPToSI(FPOp->
getOperand(1), PownType->getParamType(1));
1543 1, AttributeFuncs::typeIncompatible(CastedArg->
getType(),
1547 return fold_pow(FPOp,
B, PownInfo) ||
true;
1551 if (fold_pow(FPOp,
B, FInfo))
1560 if (FunctionCallee PowFastFunc = getFunction(M, PowFastInfo)) {
1562 return fold_pow(FPOp,
B, PowFastInfo) ||
true;
1571 const FuncInfo &FInfo) {
1574 FuncInfo nf = FInfo;
1576 return getFunction(M, nf);
1582bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
1583 bool AllowMinSizeF32,
1585 bool AllowStrictFP) {
1600 if (!AllowStrictFP && ParentF->
hasFnAttribute(Attribute::StrictFP))
1603 if (IsF32 && !AllowMinSizeF32 && ParentF->
hasMinSize())
1608void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(
IRBuilder<> &
B,
1616 if (Arg0VecTy && !Arg1VecTy) {
1617 Value *SplatRHS =
B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1619 }
else if (!Arg0VecTy && Arg1VecTy) {
1620 Value *SplatLHS =
B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1626 CI->
getModule(), IntrID, {CI->getType()}));
1629bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1631 bool AllowF64,
bool AllowStrictFP) {
1632 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1635 replaceLibCallWithSimpleIntrinsic(
B, CI, IntrID);
1639std::tuple<Value *, Value *, Value *>
1643 Function *
F =
B.GetInsertBlock()->getParent();
1644 B.SetInsertPointPastAllocas(
F);
1646 AllocaInst *
Alloc =
B.CreateAlloca(Arg->
getType(),
nullptr,
"__sincos_");
1653 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1656 B.SetCurrentDebugLocation(
DL);
1664 Value *CastAlloc =
B.CreateAddrSpaceCast(
Alloc, CosPtrTy);
1672 return {SinCos, LoadCos, SinCos};
1677 const FuncInfo &fInfo) {
1696 Function *
F =
B.GetInsertBlock()->getParent();
1702 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1706 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1709 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1710 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1711 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1720 const std::string PairName = PartnerInfo.mangle();
1724 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1725 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1729 MDNode *FPMath = CI->
getMetadata(LLVMContext::MD_fpmath);
1733 for (User* U : CArgVal->
users()) {
1742 bool Handled =
true;
1744 if (UCallee->
getName() == SinName)
1746 else if (UCallee->
getName() == CosName)
1748 else if (UCallee->
getName() == SinCosPrivateName ||
1749 UCallee->
getName() == SinCosGenericName)
1757 FMF &= OtherOp->getFastMathFlags();
1766 B.setFastMathFlags(FMF);
1767 B.setDefaultFPMathTag(FPMath);
1769 B.SetCurrentDebugLocation(DbgLoc);
1771 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF,
B, FSinCos);
1774 for (CallInst *
C : Calls)
1775 C->replaceAllUsesWith(Res);
1780 replaceTrigInsts(SinCalls, Sin);
1781 replaceTrigInsts(CosCalls, Cos);
1782 replaceTrigInsts(SinCosCalls, SinCos);
1789bool AMDGPULibCalls::evaluateScalarMathFunc(
const FuncInfo &FInfo,
1795 double opr0 = 0.0, opr1 = 0.0;
1810 switch (FInfo.getId()) {
1820 Res0 =
APFloat{log(opr0 + sqrt(opr0 * opr0 - 1.0))};
1833 Res0 =
APFloat{log(opr0 + sqrt(opr0 * opr0 + 1.0))};
1846 Res0 =
APFloat{(log(opr0 + 1.0) - log(opr0 - 1.0)) / 2.0};
1855 APFloat{(opr0 < 0.0) ? -
pow(-opr0, 1.0 / 3.0) :
pow(opr0, 1.0 / 3.0)};
1887 Res0 =
APFloat{log(opr0) / log(2.0)};
1891 Res0 =
APFloat{log(opr0) / log(10.0)};
1895 Res0 =
APFloat{1.0 / sqrt(opr0)};
1930 double val = (double)iopr1->getSExtValue();
1939 double val = (double)iopr1->getSExtValue();
1956bool AMDGPULibCalls::evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo) {
1957 int numArgs = (int)aCI->
arg_size();
1981 if (FuncVecSize == 1) {
1982 if (!evaluateScalarMathFunc(FInfo, Val0.
emplace_back(0.0),
1989 for (
int i = 0; i < FuncVecSize; ++i) {
1992 if (!evaluateScalarMathFunc(FInfo, Val0.
emplace_back(0.0),
2000 if (FuncVecSize == 1) {
2001 nval0 = ConstantFP::get(aCI->
getType(), Val0[0]);
2003 nval1 = ConstantFP::get(aCI->
getType(), Val1[0]);
2010 if (hasTwoResults) {
2013 "math function with ptr arg not supported yet");
2024 Simplifier.initNativeFuncs();
2029 F.printAsOperand(
dbgs(),
false,
F.getParent());
dbgs() <<
'\n';);
2031 for (
auto &BB :
F) {
2038 if (Simplifier.fold(CI))
2052 Simplifier.initNativeFuncs();
2055 for (
auto &BB :
F) {
2060 if (CI && Simplifier.useNative(CI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static Constant * getConstantFloatVector(const ArrayRef< APFloat > Values, const Type *Ty)
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
static Value * emitIsInf(IRBuilder<> &B, Value *val)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static Value * emitFastExpYLnx(IRBuilder<> &B, Value *X, Value *Y)
static Value * emitIsInteger(IRBuilder<> &B, Value *Y)
static Value * emitIsEvenInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static Value * emitPowFixup(IRBuilder<> &B, Value *X, Value *Y, Value *ExpYLnX, PowKind Kind)
Emit special case management epilog code for fast pow, powr, pown, and rootn expansions.
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static Value * emitIsOddInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Check Debug Module
FunctionAnalysisManager FAM
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
Param * getLeads()
Get leading parameters for mangled lib functions.
ENamePrefix getPrefix() const
static constexpr roundingMode rmNearestTiesToEven
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
static LLVM_ABI Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask)
InstListType::iterator iterator
Instruction iterators...
void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove)
Removes the attributes from the given argument.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
AttributeSet getParamAttributes(unsigned ArgNo) const
Return the param attributes for this call.
bool isNoInline() const
Return true if the call should not be inlined.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Utility class for floating point operations which can have information about relaxed accuracy require...
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto dyn_cast_or_null(const Y &Val)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.