LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
55#include "llvm/IR/Statepoint.h"
56#include "llvm/IR/Type.h"
57#include "llvm/IR/User.h"
58#include "llvm/IR/Value.h"
59#include "llvm/IR/ValueHandle.h"
64#include "llvm/Support/Debug.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <optional>
79#include <utility>
80#include <vector>
81
82#define DEBUG_TYPE "instcombine"
84
85using namespace llvm;
86using namespace PatternMatch;
87
88STATISTIC(NumSimplified, "Number of library calls simplified");
89
91 "instcombine-guard-widening-window",
92 cl::init(3),
93 cl::desc("How wide an instruction window to bypass looking for "
94 "another guard"));
95
96/// Return the specified type promoted as it would be to pass though a va_arg
97/// area.
99 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
100 if (ITy->getBitWidth() < 32)
101 return Type::getInt32Ty(Ty->getContext());
102 }
103 return Ty;
104}
105
106/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
107/// TODO: This should probably be integrated with visitAllocSites, but that
108/// requires a deeper change to allow either unread or unwritten objects.
110 auto *Src = MI->getRawSource();
111 while (isa<GetElementPtrInst>(Src)) {
112 if (!Src->hasOneUse())
113 return false;
114 Src = cast<Instruction>(Src)->getOperand(0);
115 }
116 return isa<AllocaInst>(Src) && Src->hasOneUse();
117}
118
120 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
121 MaybeAlign CopyDstAlign = MI->getDestAlign();
122 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
123 MI->setDestAlignment(DstAlign);
124 return MI;
125 }
126
127 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
128 MaybeAlign CopySrcAlign = MI->getSourceAlign();
129 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
130 MI->setSourceAlignment(SrcAlign);
131 return MI;
132 }
133
134 // If we have a store to a location which is known constant, we can conclude
135 // that the store must be storing the constant value (else the memory
136 // wouldn't be constant), and this must be a noop.
137 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
138 // Set the size of the copy to 0, it will be deleted on the next iteration.
139 MI->setLength((uint64_t)0);
140 return MI;
141 }
142
143 // If the source is provably undef, the memcpy/memmove doesn't do anything
144 // (unless the transfer is volatile).
145 if (hasUndefSource(MI) && !MI->isVolatile()) {
146 // Set the size of the copy to 0, it will be deleted on the next iteration.
147 MI->setLength((uint64_t)0);
148 return MI;
149 }
150
151 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
152 // load/store.
153 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
154 if (!MemOpLength) return nullptr;
155
156 // Source and destination pointer types are always "i8*" for intrinsic. See
157 // if the size is something we can handle with a single primitive load/store.
158 // A single load+store correctly handles overlapping memory in the memmove
159 // case.
160 uint64_t Size = MemOpLength->getLimitedValue();
161 assert(Size && "0-sized memory transferring should be removed already.");
162
163 if (Size > 8 || (Size&(Size-1)))
164 return nullptr; // If not 1/2/4/8 bytes, exit.
165
166 // If it is an atomic and alignment is less than the size then we will
167 // introduce the unaligned memory access which will be later transformed
168 // into libcall in CodeGen. This is not evident performance gain so disable
169 // it now.
170 if (MI->isAtomic())
171 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
172 return nullptr;
173
174 // Use an integer load+store unless we can find something better.
175 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
176
177 // If the memcpy has metadata describing the members, see if we can get the
178 // TBAA, scope and noalias tags describing our copy.
179 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
180
181 Value *Src = MI->getArgOperand(1);
182 Value *Dest = MI->getArgOperand(0);
183 LoadInst *L = Builder.CreateLoad(IntType, Src);
184 // Alignment from the mem intrinsic will be better, so use it.
185 L->setAlignment(*CopySrcAlign);
186 L->setAAMetadata(AACopyMD);
187 MDNode *LoopMemParallelMD =
188 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
189 if (LoopMemParallelMD)
190 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
191 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
192 if (AccessGroupMD)
193 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
194
195 StoreInst *S = Builder.CreateStore(L, Dest);
196 // Alignment from the mem intrinsic will be better, so use it.
197 S->setAlignment(*CopyDstAlign);
198 S->setAAMetadata(AACopyMD);
199 if (LoopMemParallelMD)
200 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
201 if (AccessGroupMD)
202 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
203 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
204
205 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
206 // non-atomics can be volatile
207 L->setVolatile(MT->isVolatile());
208 S->setVolatile(MT->isVolatile());
209 }
210 if (MI->isAtomic()) {
211 // atomics have to be unordered
212 L->setOrdering(AtomicOrdering::Unordered);
214 }
215
216 // Set the size of the copy to 0, it will be deleted on the next iteration.
217 MI->setLength((uint64_t)0);
218 return MI;
219}
220
222 const Align KnownAlignment =
223 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
224 MaybeAlign MemSetAlign = MI->getDestAlign();
225 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
226 MI->setDestAlignment(KnownAlignment);
227 return MI;
228 }
229
230 // If we have a store to a location which is known constant, we can conclude
231 // that the store must be storing the constant value (else the memory
232 // wouldn't be constant), and this must be a noop.
233 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
234 // Set the size of the copy to 0, it will be deleted on the next iteration.
235 MI->setLength((uint64_t)0);
236 return MI;
237 }
238
239 // Remove memset with an undef value.
240 // FIXME: This is technically incorrect because it might overwrite a poison
241 // value. Change to PoisonValue once #52930 is resolved.
242 if (isa<UndefValue>(MI->getValue())) {
243 // Set the size of the copy to 0, it will be deleted on the next iteration.
244 MI->setLength((uint64_t)0);
245 return MI;
246 }
247
248 // Extract the length and alignment and fill if they are constant.
249 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
250 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
251 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
252 return nullptr;
253 const uint64_t Len = LenC->getLimitedValue();
254 assert(Len && "0-sized memory setting should be removed already.");
255 const Align Alignment = MI->getDestAlign().valueOrOne();
256
257 // If it is an atomic and alignment is less than the size then we will
258 // introduce the unaligned memory access which will be later transformed
259 // into libcall in CodeGen. This is not evident performance gain so disable
260 // it now.
261 if (MI->isAtomic() && Alignment < Len)
262 return nullptr;
263
264 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
265 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
266 Value *Dest = MI->getDest();
267
268 // Extract the fill value and store.
269 Constant *FillVal = ConstantInt::get(
270 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
271 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
272 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
273 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
274 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
275 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
276 }
277
278 S->setAlignment(Alignment);
279 if (MI->isAtomic())
281
282 // Set the size of the copy to 0, it will be deleted on the next iteration.
283 MI->setLength((uint64_t)0);
284 return MI;
285 }
286
287 return nullptr;
288}
289
290// TODO, Obvious Missing Transforms:
291// * Narrow width by halfs excluding zero/undef lanes
292Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
293 Value *LoadPtr = II.getArgOperand(0);
294 const Align Alignment = II.getParamAlign(0).valueOrOne();
295 Value *Mask = II.getArgOperand(1);
296
297 // If the mask is all ones or poison, this is a plain vector load of the 1st
298 // argument.
299 if (match(Mask, m_AllOnesOrPoison())) {
300 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
301 "unmaskedload");
302 L->copyMetadata(II);
303 return L;
304 }
305
306 // If we can unconditionally load from this address, replace with a
307 // load/select idiom. TODO: use DT for context sensitive query
308 if (isDereferenceablePointer(LoadPtr, II.getType(),
309 II.getDataLayout(), &II, &AC)) {
310 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
311 "unmaskedload");
312 LI->copyMetadata(II);
313 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
314 }
315
316 return nullptr;
317}
318
319// TODO, Obvious Missing Transforms:
320// * Single constant active lane -> store
321// * Narrow width by halfs excluding zero/undef lanes
322Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
323 Value *StorePtr = II.getArgOperand(1);
324 Align Alignment = II.getParamAlign(1).valueOrOne();
325 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
326 if (!ConstMask)
327 return nullptr;
328
329 // If the mask is all zeros or poison, this instruction does nothing.
330 if (match(ConstMask, m_ZeroOrPoison()))
332
333 // If the mask is all ones or poison, this is a plain vector store of the 1st
334 // argument.
335 if (match(ConstMask, m_AllOnesOrPoison())) {
336 StoreInst *S =
337 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
338 S->copyMetadata(II);
339 return S;
340 }
341
342 if (isa<ScalableVectorType>(ConstMask->getType()))
343 return nullptr;
344
345 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
346 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
347 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
348 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
349 PoisonElts))
350 return replaceOperand(II, 0, V);
351
352 return nullptr;
353}
354
355// TODO, Obvious Missing Transforms:
356// * Single constant active lane load -> load
357// * Dereferenceable address & few lanes -> scalarize speculative load/selects
358// * Adjacent vector addresses -> masked.load
359// * Narrow width by halfs excluding zero/undef lanes
360// * Vector incrementing address -> vector masked load
361Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
362 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
363 if (!ConstMask)
364 return nullptr;
365
366 // Vector splat address w/known mask -> scalar load
367 // Fold the gather to load the source vector first lane
368 // because it is reloading the same value each time
369 if (ConstMask->isAllOnesValue())
370 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
371 auto *VecTy = cast<VectorType>(II.getType());
372 const Align Alignment = II.getParamAlign(0).valueOrOne();
373 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
374 Alignment, "load.scalar");
375 Value *Shuf =
376 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
378 }
379
380 return nullptr;
381}
382
383// TODO, Obvious Missing Transforms:
384// * Single constant active lane -> store
385// * Adjacent vector addresses -> masked.store
386// * Narrow store width by halfs excluding zero/undef lanes
387// * Vector incrementing address -> vector masked store
388Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
389 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
390 if (!ConstMask)
391 return nullptr;
392
393 // If the mask is all zeros or poison, a scatter does nothing.
394 if (match(ConstMask, m_ZeroOrPoison()))
396
397 // Vector splat address -> scalar store
398 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
399 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
400 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
401 if (maskContainsAllOneOrUndef(ConstMask)) {
402 Align Alignment = II.getParamAlign(1).valueOrOne();
403 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
404 Alignment);
405 S->copyMetadata(II);
406 return S;
407 }
408 }
409 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
410 // lastlane), ptr
411 if (ConstMask->isAllOnesValue()) {
412 Align Alignment = II.getParamAlign(1).valueOrOne();
413 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
414 ElementCount VF = WideLoadTy->getElementCount();
415 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
416 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
417 Value *Extract =
418 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
419 StoreInst *S =
420 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
421 S->copyMetadata(II);
422 return S;
423 }
424 }
425 if (isa<ScalableVectorType>(ConstMask->getType()))
426 return nullptr;
427
428 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
429 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
430 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
431 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
432 PoisonElts))
433 return replaceOperand(II, 0, V);
434 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
435 PoisonElts))
436 return replaceOperand(II, 1, V);
437
438 return nullptr;
439}
440
441/// This function transforms launder.invariant.group and strip.invariant.group
442/// like:
443/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
444/// launder(strip(%x)) -> launder(%x)
445/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
446/// strip(launder(%x)) -> strip(%x)
447/// This is legal because it preserves the most recent information about
448/// the presence or absence of invariant.group.
450 InstCombinerImpl &IC) {
451 auto *Arg = II.getArgOperand(0);
452 auto *StrippedArg = Arg->stripPointerCasts();
453 auto *StrippedInvariantGroupsArg = StrippedArg;
454 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
455 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
456 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
457 break;
458 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
459 }
460 if (StrippedArg == StrippedInvariantGroupsArg)
461 return nullptr; // No launders/strips to remove.
462
463 Value *Result = nullptr;
464
465 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
466 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
467 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
468 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
469 else
471 "simplifyInvariantGroupIntrinsic only handles launder and strip");
472 if (Result->getType()->getPointerAddressSpace() !=
473 II.getType()->getPointerAddressSpace())
474 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
475
476 return cast<Instruction>(Result);
477}
478
480 assert((II.getIntrinsicID() == Intrinsic::cttz ||
481 II.getIntrinsicID() == Intrinsic::ctlz) &&
482 "Expected cttz or ctlz intrinsic");
483 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
484 Value *Op0 = II.getArgOperand(0);
485 Value *Op1 = II.getArgOperand(1);
486 Value *X;
487 // ctlz(bitreverse(x)) -> cttz(x)
488 // cttz(bitreverse(x)) -> ctlz(x)
489 if (match(Op0, m_BitReverse(m_Value(X)))) {
490 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
491 Function *F =
492 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
493 return CallInst::Create(F, {X, II.getArgOperand(1)});
494 }
495
496 if (II.getType()->isIntOrIntVectorTy(1)) {
497 // ctlz/cttz i1 Op0 --> not Op0
498 if (match(Op1, m_Zero()))
499 return BinaryOperator::CreateNot(Op0);
500 // If zero is poison, then the input can be assumed to be "true", so the
501 // instruction simplifies to "false".
502 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
503 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
504 }
505
506 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
507 if (II.hasOneUse() && match(Op1, m_Zero()) &&
508 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
509 II.dropUBImplyingAttrsAndMetadata();
510 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
511 }
512
513 Constant *C;
514
515 if (IsTZ) {
516 // cttz(-x) -> cttz(x)
517 if (match(Op0, m_Neg(m_Value(X))))
518 return IC.replaceOperand(II, 0, X);
519
520 // cttz(-x & x) -> cttz(x)
521 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
522 return IC.replaceOperand(II, 0, X);
523
524 // cttz(sext(x)) -> cttz(zext(x))
525 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
526 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
527 auto *CttzZext =
528 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
529 return IC.replaceInstUsesWith(II, CttzZext);
530 }
531
532 // Zext doesn't change the number of trailing zeros, so narrow:
533 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
534 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
535 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
536 IC.Builder.getTrue());
537 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
538 return IC.replaceInstUsesWith(II, ZextCttz);
539 }
540
541 // cttz(abs(x)) -> cttz(x)
542 // cttz(nabs(x)) -> cttz(x)
543 Value *Y;
545 if (SPF == SPF_ABS || SPF == SPF_NABS)
546 return IC.replaceOperand(II, 0, X);
547
549 return IC.replaceOperand(II, 0, X);
550
551 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
552 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
553 match(Op1, m_One())) {
554 Value *ConstCttz =
555 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
556 return BinaryOperator::CreateAdd(ConstCttz, X);
557 }
558
559 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
560 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
561 match(Op1, m_One())) {
562 Value *ConstCttz =
563 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
564 return BinaryOperator::CreateSub(ConstCttz, X);
565 }
566
567 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
568 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
569 Value *Width =
570 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
571 return BinaryOperator::CreateSub(Width, X);
572 }
573 } else {
574 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
575 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
576 match(Op1, m_One())) {
577 Value *ConstCtlz =
578 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
579 return BinaryOperator::CreateAdd(ConstCtlz, X);
580 }
581
582 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
583 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
584 match(Op1, m_One())) {
585 Value *ConstCtlz =
586 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
587 return BinaryOperator::CreateSub(ConstCtlz, X);
588 }
589
590 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
591 if (Op0->hasOneUse() &&
592 match(Op0,
594 Type *Ty = II.getType();
595 unsigned BitWidth = Ty->getScalarSizeInBits();
596 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
597 {X, IC.Builder.getFalse()});
598 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
599 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
600 }
601 }
602
603 // cttz(Pow2) -> Log2(Pow2)
604 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
605 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
606 if (IsTZ)
607 return IC.replaceInstUsesWith(II, R);
608 BinaryOperator *BO = BinaryOperator::CreateSub(
609 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
610 R);
611 BO->setHasNoSignedWrap();
613 return BO;
614 }
615
616 KnownBits Known = IC.computeKnownBits(Op0, &II);
617
618 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
619 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
620 : Known.countMaxLeadingZeros();
621 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
622 : Known.countMinLeadingZeros();
623
624 // If all bits above (ctlz) or below (cttz) the first known one are known
625 // zero, this value is constant.
626 // FIXME: This should be in InstSimplify because we're replacing an
627 // instruction with a constant.
628 if (PossibleZeros == DefiniteZeros) {
629 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
630 return IC.replaceInstUsesWith(II, C);
631 }
632
633 // If the input to cttz/ctlz is known to be non-zero,
634 // then change the 'ZeroIsPoison' parameter to 'true'
635 // because we know the zero behavior can't affect the result.
636 if (!Known.One.isZero() ||
638 if (!match(II.getArgOperand(1), m_One()))
639 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
640 }
641
642 // Add range attribute since known bits can't completely reflect what we know.
643 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
644 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
645 !II.getMetadata(LLVMContext::MD_range)) {
646 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
647 APInt(BitWidth, PossibleZeros + 1));
648 II.addRangeRetAttr(Range);
649 return &II;
650 }
651
652 return nullptr;
653}
654
656 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
657 "Expected ctpop intrinsic");
658 Type *Ty = II.getType();
659 unsigned BitWidth = Ty->getScalarSizeInBits();
660 Value *Op0 = II.getArgOperand(0);
661 Value *X, *Y;
662
663 // ctpop(bitreverse(x)) -> ctpop(x)
664 // ctpop(bswap(x)) -> ctpop(x)
665 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
666 return IC.replaceOperand(II, 0, X);
667
668 // ctpop(rot(x)) -> ctpop(x)
669 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
670 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
671 X == Y)
672 return IC.replaceOperand(II, 0, X);
673
674 // ctpop(x | -x) -> bitwidth - cttz(x, false)
675 if (Op0->hasOneUse() &&
676 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
677 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
678 {X, IC.Builder.getFalse()});
679 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
680 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
681 }
682
683 // ctpop(~x & (x - 1)) -> cttz(x, false)
684 if (match(Op0,
686 Function *F =
687 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
688 return CallInst::Create(F, {X, IC.Builder.getFalse()});
689 }
690
691 // Zext doesn't change the number of set bits, so narrow:
692 // ctpop (zext X) --> zext (ctpop X)
693 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
694 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
695 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
696 }
697
698 KnownBits Known(BitWidth);
699 IC.computeKnownBits(Op0, Known, &II);
700
701 // If all bits are zero except for exactly one fixed bit, then the result
702 // must be 0 or 1, and we can get that answer by shifting to LSB:
703 // ctpop (X & 32) --> (X & 32) >> 5
704 // TODO: Investigate removing this as its likely unnecessary given the below
705 // `isKnownToBeAPowerOfTwo` check.
706 if ((~Known.Zero).isPowerOf2())
707 return BinaryOperator::CreateLShr(
708 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
709
710 // More generally we can also handle non-constant power of 2 patterns such as
711 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
712 // ctpop(Pow2OrZero) --> icmp ne X, 0
713 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
714 return CastInst::Create(Instruction::ZExt,
717 Ty);
718
719 // Add range attribute since known bits can't completely reflect what we know.
720 if (BitWidth != 1) {
721 ConstantRange OldRange =
722 II.getRange().value_or(ConstantRange::getFull(BitWidth));
723
724 unsigned Lower = Known.countMinPopulation();
725 unsigned Upper = Known.countMaxPopulation() + 1;
726
727 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
729 Lower = 1;
730
732 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
733
734 if (Range != OldRange) {
735 II.addRangeRetAttr(Range);
736 return &II;
737 }
738 }
739
740 return nullptr;
741}
742
743/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
744/// at most two source operands are actually referenced.
746 bool IsExtension) {
747 // Bail out if the mask is not a constant.
748 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
749 if (!C)
750 return nullptr;
751
752 auto *RetTy = cast<FixedVectorType>(II.getType());
753 unsigned NumIndexes = RetTy->getNumElements();
754
755 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
756 if (!RetTy->getElementType()->isIntegerTy(8) ||
757 (NumIndexes != 8 && NumIndexes != 16))
758 return nullptr;
759
760 // For tbx instructions, the first argument is the "fallback" vector, which
761 // has the same length as the mask and return type.
762 unsigned int StartIndex = (unsigned)IsExtension;
763 auto *SourceTy =
764 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
765 // Note that the element count of each source vector does *not* need to be the
766 // same as the element count of the return type and mask! All source vectors
767 // must have the same element count as each other, though.
768 unsigned NumElementsPerSource = SourceTy->getNumElements();
769
770 // There are no tbl/tbx intrinsics for which the destination size exceeds the
771 // source size. However, our definitions of the intrinsics, at least in
772 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
773 // *could* technically happen.
774 if (NumIndexes > NumElementsPerSource)
775 return nullptr;
776
777 // The tbl/tbx intrinsics take several source operands followed by a mask
778 // operand.
779 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
780
781 // Map input operands to shuffle indices. This also helpfully deduplicates the
782 // input arguments, in case the same value is passed as an argument multiple
783 // times.
784 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
785 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
786 PoisonValue::get(SourceTy)};
787
788 int Indexes[16];
789 for (unsigned I = 0; I < NumIndexes; ++I) {
790 Constant *COp = C->getAggregateElement(I);
791
792 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
793 return nullptr;
794
795 if (isa<UndefValue>(COp)) {
796 Indexes[I] = -1;
797 continue;
798 }
799
800 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
801 // The index of the input argument that this index references (0 = first
802 // source argument, etc).
803 unsigned SourceOperandIndex = Index / NumElementsPerSource;
804 // The index of the element at that source operand.
805 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
806
807 Value *SourceOperand;
808 if (SourceOperandIndex >= NumSourceOperands) {
809 // This index is out of bounds. Map it to index into either the fallback
810 // vector (tbx) or vector of zeroes (tbl).
811 SourceOperandIndex = NumSourceOperands;
812 if (IsExtension) {
813 // For out-of-bounds indices in tbx, choose the `I`th element of the
814 // fallback.
815 SourceOperand = II.getArgOperand(0);
816 SourceOperandElementIndex = I;
817 } else {
818 // Otherwise, choose some element from the dummy vector of zeroes (we'll
819 // always choose the first).
820 SourceOperand = Constant::getNullValue(SourceTy);
821 SourceOperandElementIndex = 0;
822 }
823 } else {
824 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
825 }
826
827 // The source operand may be the fallback vector, which may not have the
828 // same number of elements as the source vector. In that case, we *could*
829 // choose to extend its length with another shufflevector, but it's simpler
830 // to just bail instead.
831 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
832 NumElementsPerSource)
833 return nullptr;
834
835 // We now know the source operand referenced by this index. Make it a
836 // shufflevector operand, if it isn't already.
837 unsigned NumSlots = ValueToShuffleSlot.size();
838 // This shuffle references more than two sources, and hence cannot be
839 // represented as a shufflevector.
840 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
841 return nullptr;
842
843 auto [It, Inserted] =
844 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
845 if (Inserted)
846 ShuffleOperands[It->getSecond()] = SourceOperand;
847
848 unsigned RemappedIndex =
849 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
850 Indexes[I] = RemappedIndex;
851 }
852
854 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
855 return IC.replaceInstUsesWith(II, Shuf);
856}
857
858// Returns true iff the 2 intrinsics have the same operands, limiting the
859// comparison to the first NumOperands.
860static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
861 unsigned NumOperands) {
862 assert(I.arg_size() >= NumOperands && "Not enough operands");
863 assert(E.arg_size() >= NumOperands && "Not enough operands");
864 for (unsigned i = 0; i < NumOperands; i++)
865 if (I.getArgOperand(i) != E.getArgOperand(i))
866 return false;
867 return true;
868}
869
870// Remove trivially empty start/end intrinsic ranges, i.e. a start
871// immediately followed by an end (ignoring debuginfo or other
872// start/end intrinsics in between). As this handles only the most trivial
873// cases, tracking the nesting level is not needed:
874//
875// call @llvm.foo.start(i1 0)
876// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
877// call @llvm.foo.end(i1 0)
878// call @llvm.foo.end(i1 0) ; &I
879static bool
881 std::function<bool(const IntrinsicInst &)> IsStart) {
882 // We start from the end intrinsic and scan backwards, so that InstCombine
883 // has already processed (and potentially removed) all the instructions
884 // before the end intrinsic.
885 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
886 for (; BI != BE; ++BI) {
887 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
888 if (I->isDebugOrPseudoInst() ||
889 I->getIntrinsicID() == EndI.getIntrinsicID())
890 continue;
891 if (IsStart(*I)) {
892 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
894 IC.eraseInstFromFunction(EndI);
895 return true;
896 }
897 // Skip start intrinsics that don't pair with this end intrinsic.
898 continue;
899 }
900 }
901 break;
902 }
903
904 return false;
905}
906
908 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
909 // Bail out on the case where the source va_list of a va_copy is destroyed
910 // immediately by a follow-up va_end.
911 return II.getIntrinsicID() == Intrinsic::vastart ||
912 (II.getIntrinsicID() == Intrinsic::vacopy &&
913 I.getArgOperand(0) != II.getArgOperand(1));
914 });
915 return nullptr;
916}
917
919 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
920 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
921 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
922 Call.setArgOperand(0, Arg1);
923 Call.setArgOperand(1, Arg0);
924 return &Call;
925 }
926 return nullptr;
927}
928
929/// Creates a result tuple for an overflow intrinsic \p II with a given
930/// \p Result and a constant \p Overflow value.
932 Constant *Overflow) {
933 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
934 StructType *ST = cast<StructType>(II->getType());
935 Constant *Struct = ConstantStruct::get(ST, V);
936 return InsertValueInst::Create(Struct, Result, 0);
937}
938
940InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
941 WithOverflowInst *WO = cast<WithOverflowInst>(II);
942 Value *OperationResult = nullptr;
943 Constant *OverflowResult = nullptr;
944 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
945 WO->getRHS(), *WO, OperationResult, OverflowResult))
946 return createOverflowTuple(WO, OperationResult, OverflowResult);
947
948 // See whether we can optimize the overflow check with assumption information.
949 for (User *U : WO->users()) {
950 if (!match(U, m_ExtractValue<1>(m_Value())))
951 continue;
952
953 for (auto &AssumeVH : AC.assumptionsFor(U)) {
954 if (!AssumeVH)
955 continue;
956 CallInst *I = cast<CallInst>(AssumeVH);
957 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
958 continue;
959 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
960 /*AllowEphemerals=*/true))
961 continue;
962 Value *Result =
963 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
964 Result->takeName(WO);
965 if (auto *Inst = dyn_cast<Instruction>(Result)) {
966 if (WO->isSigned())
967 Inst->setHasNoSignedWrap();
968 else
969 Inst->setHasNoUnsignedWrap();
970 }
971 return createOverflowTuple(WO, Result,
972 ConstantInt::getFalse(U->getType()));
973 }
974 }
975
976 return nullptr;
977}
978
979static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
980 Ty = Ty->getScalarType();
981 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
982}
983
984static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
985 Ty = Ty->getScalarType();
986 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
987}
988
989/// \returns the compare predicate type if the test performed by
990/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
991/// floating-point environment assumed for \p F for type \p Ty
993 const Function &F, Type *Ty) {
994 switch (static_cast<unsigned>(Mask)) {
995 case fcZero:
996 if (inputDenormalIsIEEE(F, Ty))
997 return FCmpInst::FCMP_OEQ;
998 break;
999 case fcZero | fcSubnormal:
1000 if (inputDenormalIsDAZ(F, Ty))
1001 return FCmpInst::FCMP_OEQ;
1002 break;
1003 case fcPositive | fcNegZero:
1004 if (inputDenormalIsIEEE(F, Ty))
1005 return FCmpInst::FCMP_OGE;
1006 break;
1008 if (inputDenormalIsDAZ(F, Ty))
1009 return FCmpInst::FCMP_OGE;
1010 break;
1012 if (inputDenormalIsIEEE(F, Ty))
1013 return FCmpInst::FCMP_OGT;
1014 break;
1015 case fcNegative | fcPosZero:
1016 if (inputDenormalIsIEEE(F, Ty))
1017 return FCmpInst::FCMP_OLE;
1018 break;
1020 if (inputDenormalIsDAZ(F, Ty))
1021 return FCmpInst::FCMP_OLE;
1022 break;
1024 if (inputDenormalIsIEEE(F, Ty))
1025 return FCmpInst::FCMP_OLT;
1026 break;
1027 case fcPosNormal | fcPosInf:
1028 if (inputDenormalIsDAZ(F, Ty))
1029 return FCmpInst::FCMP_OGT;
1030 break;
1031 case fcNegNormal | fcNegInf:
1032 if (inputDenormalIsDAZ(F, Ty))
1033 return FCmpInst::FCMP_OLT;
1034 break;
1035 case ~fcZero & ~fcNan:
1036 if (inputDenormalIsIEEE(F, Ty))
1037 return FCmpInst::FCMP_ONE;
1038 break;
1039 case ~(fcZero | fcSubnormal) & ~fcNan:
1040 if (inputDenormalIsDAZ(F, Ty))
1041 return FCmpInst::FCMP_ONE;
1042 break;
1043 default:
1044 break;
1045 }
1046
1048}
1049
1050Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1051 Value *Src0 = II.getArgOperand(0);
1052 Value *Src1 = II.getArgOperand(1);
1053 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1054 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1055 const bool IsUnordered = (Mask & fcNan) == fcNan;
1056 const bool IsOrdered = (Mask & fcNan) == fcNone;
1057 const FPClassTest OrderedMask = Mask & ~fcNan;
1058 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1059
1060 const bool IsStrict =
1061 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1062
1063 Value *FNegSrc;
1064 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1065 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1066
1067 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1068 return replaceOperand(II, 0, FNegSrc);
1069 }
1070
1071 Value *FAbsSrc;
1072 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1073 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1074 return replaceOperand(II, 0, FAbsSrc);
1075 }
1076
1077 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1078 (IsOrdered || IsUnordered) && !IsStrict) {
1079 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1080 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1081 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1082 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1084 FCmpInst::Predicate Pred =
1085 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1086 if (OrderedInvertedMask == fcInf)
1087 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1088
1089 Value *Fabs = Builder.CreateFAbs(Src0);
1090 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1091 CmpInf->takeName(&II);
1092 return replaceInstUsesWith(II, CmpInf);
1093 }
1094
1095 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1096 (IsOrdered || IsUnordered) && !IsStrict) {
1097 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1098 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1099 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1100 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1101 Constant *Inf =
1102 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1103 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1104 : Builder.CreateFCmpOEQ(Src0, Inf);
1105
1106 EqInf->takeName(&II);
1107 return replaceInstUsesWith(II, EqInf);
1108 }
1109
1110 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1111 (IsOrdered || IsUnordered) && !IsStrict) {
1112 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1113 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1114 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1115 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1117 OrderedInvertedMask == fcNegInf);
1118 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1119 : Builder.CreateFCmpONE(Src0, Inf);
1120 NeInf->takeName(&II);
1121 return replaceInstUsesWith(II, NeInf);
1122 }
1123
1124 if (Mask == fcNan && !IsStrict) {
1125 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1126 // exceptions.
1127 Value *IsNan =
1128 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1129 IsNan->takeName(&II);
1130 return replaceInstUsesWith(II, IsNan);
1131 }
1132
1133 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1134 // Equivalent of !isnan. Replace with standard fcmp.
1135 Value *FCmp =
1136 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1137 FCmp->takeName(&II);
1138 return replaceInstUsesWith(II, FCmp);
1139 }
1140
1142
1143 // Try to replace with an fcmp with 0
1144 //
1145 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1146 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1147 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1148 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1149 //
1150 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1151 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1152 //
1153 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1154 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1155 //
1156 if (!IsStrict && (IsOrdered || IsUnordered) &&
1157 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1158 Src0->getType())) !=
1161 // Equivalent of == 0.
1162 Value *FCmp = Builder.CreateFCmp(
1163 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1164 Src0, Zero);
1165
1166 FCmp->takeName(&II);
1167 return replaceInstUsesWith(II, FCmp);
1168 }
1169
1170 KnownFPClass Known =
1171 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1172
1173 // Clear test bits we know must be false from the source value.
1174 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1175 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1176 if ((Mask & Known.KnownFPClasses) != Mask) {
1177 II.setArgOperand(
1178 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1179 return &II;
1180 }
1181
1182 // If none of the tests which can return false are possible, fold to true.
1183 // fp_class (nnan x), ~(qnan|snan) -> true
1184 // fp_class (ninf x), ~(ninf|pinf) -> true
1185 if (Mask == Known.KnownFPClasses)
1186 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1187
1188 return nullptr;
1189}
1190
1191static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1192 KnownBits Known = computeKnownBits(Op, SQ);
1193 if (Known.isNonNegative())
1194 return false;
1195 if (Known.isNegative())
1196 return true;
1197
1198 Value *X, *Y;
1199 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1201
1202 return std::nullopt;
1203}
1204
1205static std::optional<bool> getKnownSignOrZero(Value *Op,
1206 const SimplifyQuery &SQ) {
1207 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1208 return Sign;
1209
1210 Value *X, *Y;
1211 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1213
1214 return std::nullopt;
1215}
1216
1217/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1218static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1219 const SimplifyQuery &SQ) {
1220 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1221 if (!Known1)
1222 return false;
1223 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1224 if (!Known0)
1225 return false;
1226 return *Known0 == *Known1;
1227}
1228
1229// Determines if ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b)) is safe.
1230//
1231// This is true if, when the add saturates, the resulting ldexp is guaranteed to
1232// produce 0 or inf.
1233static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy) {
1234 const fltSemantics &FltSem = FpTy->getScalarType()->getFltSemantics();
1235 if (!APFloat::semanticsHasInf(FltSem))
1236 return false;
1237
1238 // Cap ExpBits at 32 because scalbn takes an int. This is sufficient for any
1239 // reasonable fp type (for example, `double` only has 11 exponent bits).
1240 unsigned ExpBits = std::min(ExpTy->getScalarSizeInBits(), 32u);
1241 int SignedMax = static_cast<int>(maxIntN(ExpBits));
1242 int SignedMin = static_cast<int>(minIntN(ExpBits));
1243 APFloat ScaledUp = scalbn(APFloat::getSmallest(FltSem), SignedMax,
1245 APFloat ScaledDown = scalbn(APFloat::getLargest(FltSem), SignedMin,
1247 return ScaledUp.isInfinity() && ScaledDown.isZero();
1248}
1249
1250/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1251/// can trigger other combines.
1253 InstCombiner::BuilderTy &Builder) {
1254 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1255 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1256 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1257 "Expected a min or max intrinsic");
1258
1259 // TODO: Match vectors with undef elements, but undef may not propagate.
1260 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1261 Value *X;
1262 const APInt *C0, *C1;
1263 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1264 !match(Op1, m_APInt(C1)))
1265 return nullptr;
1266
1267 // Check for necessary no-wrap and overflow constraints.
1268 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1269 auto *Add = cast<BinaryOperator>(Op0);
1270 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1271 (!IsSigned && !Add->hasNoUnsignedWrap()))
1272 return nullptr;
1273
1274 // If the constant difference overflows, then instsimplify should reduce the
1275 // min/max to the add or C1.
1276 bool Overflow;
1277 APInt CDiff =
1278 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1279 assert(!Overflow && "Expected simplify of min/max");
1280
1281 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1282 // Note: the "mismatched" no-overflow setting does not propagate.
1283 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1284 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1285 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1286 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1287}
1288/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1289Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1290 Type *Ty = MinMax1.getType();
1291
1292 // We are looking for a tree of:
1293 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1294 // Where the min and max could be reversed
1295 Instruction *MinMax2;
1296 BinaryOperator *AddSub;
1297 const APInt *MinValue, *MaxValue;
1298 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1299 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1300 return nullptr;
1301 } else if (match(&MinMax1,
1302 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1303 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1304 return nullptr;
1305 } else
1306 return nullptr;
1307
1308 // Check that the constants clamp a saturate, and that the new type would be
1309 // sensible to convert to.
1310 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1311 return nullptr;
1312 // In what bitwidth can this be treated as saturating arithmetics?
1313 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1314 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1315 // good first approximation for what should be done there.
1316 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1317 return nullptr;
1318
1319 // Also make sure that the inner min/max and the add/sub have one use.
1320 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1321 return nullptr;
1322
1323 // Create the new type (which can be a vector type)
1324 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1325
1326 Intrinsic::ID IntrinsicID;
1327 if (AddSub->getOpcode() == Instruction::Add)
1328 IntrinsicID = Intrinsic::sadd_sat;
1329 else if (AddSub->getOpcode() == Instruction::Sub)
1330 IntrinsicID = Intrinsic::ssub_sat;
1331 else
1332 return nullptr;
1333
1334 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1335 // is usually achieved via a sext from a smaller type.
1336 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1337 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1338 return nullptr;
1339
1340 // Finally create and return the sat intrinsic, truncated to the new type
1341 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1342 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1343 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1344 return CastInst::Create(Instruction::SExt, Sat, Ty);
1345}
1346
1347
1348/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1349/// can only be one of two possible constant values -- turn that into a select
1350/// of constants.
1352 InstCombiner::BuilderTy &Builder) {
1353 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1354 Value *X;
1355 const APInt *C0, *C1;
1356 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1357 return nullptr;
1358
1360 switch (II->getIntrinsicID()) {
1361 case Intrinsic::smax:
1362 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1363 Pred = ICmpInst::ICMP_SGT;
1364 break;
1365 case Intrinsic::smin:
1366 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1367 Pred = ICmpInst::ICMP_SLT;
1368 break;
1369 case Intrinsic::umax:
1370 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1371 Pred = ICmpInst::ICMP_UGT;
1372 break;
1373 case Intrinsic::umin:
1374 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1375 Pred = ICmpInst::ICMP_ULT;
1376 break;
1377 default:
1378 llvm_unreachable("Expected min/max intrinsic");
1379 }
1380 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1381 return nullptr;
1382
1383 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1384 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1385 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1386 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1387}
1388
1389/// If this min/max has a constant operand and an operand that is a matching
1390/// min/max with a constant operand, constant-fold the 2 constant operands.
1392 IRBuilderBase &Builder,
1393 const SimplifyQuery &SQ) {
1394 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1395 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1396 if (!LHS)
1397 return nullptr;
1398
1399 Constant *C0, *C1;
1400 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1401 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1402 return nullptr;
1403
1404 // max (max X, C0), C1 --> max X, (max C0, C1)
1405 // min (min X, C0), C1 --> min X, (min C0, C1)
1406 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1407 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1408 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1409 if (InnerMinMaxID != MinMaxID &&
1410 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1411 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1412 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1413 return nullptr;
1414
1416 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1417 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1418 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1419 {LHS->getArgOperand(0), NewC});
1420}
1421
1422/// If this min/max has a matching min/max operand with a constant, try to push
1423/// the constant operand into this instruction. This can enable more folds.
1424static Instruction *
1426 InstCombiner::BuilderTy &Builder) {
1427 // Match and capture a min/max operand candidate.
1428 Value *X, *Y;
1429 Constant *C;
1430 Instruction *Inner;
1432 m_Instruction(Inner),
1434 m_Value(Y))))
1435 return nullptr;
1436
1437 // The inner op must match. Check for constants to avoid infinite loops.
1438 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1439 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1440 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1442 return nullptr;
1443
1444 // max (max X, C), Y --> max (max X, Y), C
1446 MinMaxID, II->getType());
1447 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1448 NewInner->takeName(Inner);
1449 return CallInst::Create(MinMax, {NewInner, C});
1450}
1451
1452/// Reduce a sequence of min/max intrinsics with a common operand.
1454 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1455 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1456 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1457 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1458 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1459 RHS->getIntrinsicID() != MinMaxID ||
1460 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1461 return nullptr;
1462
1463 Value *A = LHS->getArgOperand(0);
1464 Value *B = LHS->getArgOperand(1);
1465 Value *C = RHS->getArgOperand(0);
1466 Value *D = RHS->getArgOperand(1);
1467
1468 // Look for a common operand.
1469 Value *MinMaxOp = nullptr;
1470 Value *ThirdOp = nullptr;
1471 if (LHS->hasOneUse()) {
1472 // If the LHS is only used in this chain and the RHS is used outside of it,
1473 // reuse the RHS min/max because that will eliminate the LHS.
1474 if (D == A || C == A) {
1475 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1476 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1477 MinMaxOp = RHS;
1478 ThirdOp = B;
1479 } else if (D == B || C == B) {
1480 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1481 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1482 MinMaxOp = RHS;
1483 ThirdOp = A;
1484 }
1485 } else {
1486 assert(RHS->hasOneUse() && "Expected one-use operand");
1487 // Reuse the LHS. This will eliminate the RHS.
1488 if (D == A || D == B) {
1489 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1490 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1491 MinMaxOp = LHS;
1492 ThirdOp = C;
1493 } else if (C == A || C == B) {
1494 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1495 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1496 MinMaxOp = LHS;
1497 ThirdOp = D;
1498 }
1499 }
1500
1501 if (!MinMaxOp || !ThirdOp)
1502 return nullptr;
1503
1504 Module *Mod = II->getModule();
1505 Function *MinMax =
1506 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1507 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1508}
1509
1510/// If all arguments of the intrinsic are unary shuffles with the same mask,
1511/// try to shuffle after the intrinsic.
1514 if (!II->getType()->isVectorTy() ||
1515 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1516 !II->getCalledFunction()->isSpeculatable())
1517 return nullptr;
1518
1519 Value *X;
1520 Constant *C;
1521 ArrayRef<int> Mask;
1522 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1523 return isa<Constant>(Arg.get()) ||
1524 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1525 Arg.getOperandNo(), nullptr);
1526 });
1527 if (!NonConstArg ||
1528 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1529 return nullptr;
1530
1531 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1532 // instructions.
1533 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1534 return nullptr;
1535
1536 // See if all arguments are shuffled with the same mask.
1538 Type *SrcTy = X->getType();
1539 for (Use &Arg : II->args()) {
1540 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1541 Arg.getOperandNo(), nullptr))
1542 NewArgs.push_back(Arg);
1543 else if (match(&Arg,
1544 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1545 X->getType() == SrcTy)
1546 NewArgs.push_back(X);
1547 else if (match(&Arg, m_ImmConstant(C))) {
1548 // If it's a constant, try find the constant that would be shuffled to C.
1549 if (Constant *ShuffledC =
1550 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1551 NewArgs.push_back(ShuffledC);
1552 else
1553 return nullptr;
1554 } else
1555 return nullptr;
1556 }
1557
1558 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1559 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1560 // Result type might be a different vector width.
1561 // TODO: Check that the result type isn't widened?
1562 VectorType *ResTy =
1563 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1564 Value *NewIntrinsic =
1565 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1566 return new ShuffleVectorInst(NewIntrinsic, Mask);
1567}
1568
1569/// If all arguments of the intrinsic are reverses, try to pull the reverse
1570/// after the intrinsic.
1572 if (!II->getType()->isVectorTy() ||
1573 !isTriviallyVectorizable(II->getIntrinsicID()))
1574 return nullptr;
1575
1576 // At least 1 operand must be a reverse with 1 use because we are creating 2
1577 // instructions.
1578 if (none_of(II->args(), [](Value *V) {
1579 return match(V, m_OneUse(m_VecReverse(m_Value())));
1580 }))
1581 return nullptr;
1582
1583 Value *X;
1584 Constant *C;
1585 SmallVector<Value *> NewArgs;
1586 for (Use &Arg : II->args()) {
1587 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1588 Arg.getOperandNo(), nullptr))
1589 NewArgs.push_back(Arg);
1590 else if (match(&Arg, m_VecReverse(m_Value(X))))
1591 NewArgs.push_back(X);
1592 else if (isSplatValue(Arg))
1593 NewArgs.push_back(Arg);
1594 else if (match(&Arg, m_ImmConstant(C)))
1595 NewArgs.push_back(Builder.CreateVectorReverse(C));
1596 else
1597 return nullptr;
1598 }
1599
1600 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1601 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1602 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1603 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1604 return Builder.CreateVectorReverse(NewIntrinsic);
1605}
1606
1607/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1608/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1609/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1610template <Intrinsic::ID IntrID>
1612 InstCombiner::BuilderTy &Builder) {
1613 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1614 "This helper only supports BSWAP and BITREVERSE intrinsics");
1615
1616 Value *X, *Y;
1617 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1618 // don't match ConstantExpr that aren't meaningful for this transform.
1621 Value *OldReorderX, *OldReorderY;
1623
1624 // If both X and Y are bswap/bitreverse, the transform reduces the number
1625 // of instructions even if there's multiuse.
1626 // If only one operand is bswap/bitreverse, we need to ensure the operand
1627 // have only one use.
1628 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1629 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1630 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1631 }
1632
1633 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1634 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1635 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1636 }
1637
1638 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1639 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1640 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1641 }
1642 }
1643 return nullptr;
1644}
1645
1646/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1647/// `f(f(x, y), y) == f(x, y)` holds.
1649 switch (IID) {
1650 case Intrinsic::smax:
1651 case Intrinsic::smin:
1652 case Intrinsic::umax:
1653 case Intrinsic::umin:
1654 case Intrinsic::maximum:
1655 case Intrinsic::minimum:
1656 case Intrinsic::maximumnum:
1657 case Intrinsic::minimumnum:
1658 case Intrinsic::maxnum:
1659 case Intrinsic::minnum:
1660 return true;
1661 default:
1662 return false;
1663 }
1664}
1665
1666/// Attempt to simplify value-accumulating recurrences of kind:
1667/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1668/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1669/// And let the idempotent binary intrinsic be hoisted, when the operands are
1670/// known to be loop-invariant.
1672 IntrinsicInst *II) {
1673 PHINode *PN;
1674 Value *Init, *OtherOp;
1675
1676 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1677 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1678 auto IID = II->getIntrinsicID();
1679 if (!isIdempotentBinaryIntrinsic(IID) ||
1681 !IC.getDominatorTree().dominates(OtherOp, PN))
1682 return nullptr;
1683
1684 auto *InvariantBinaryInst =
1685 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1686 if (isa<FPMathOperator>(InvariantBinaryInst))
1687 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1688 return InvariantBinaryInst;
1689}
1690
1691static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1692 if (!CanReorderLanes)
1693 return nullptr;
1694
1695 Value *V;
1696 if (match(Arg, m_VecReverse(m_Value(V))))
1697 return V;
1698
1699 ArrayRef<int> Mask;
1700 if (!isa<FixedVectorType>(Arg->getType()) ||
1701 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1702 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1703 return nullptr;
1704
1705 int Sz = Mask.size();
1706 SmallBitVector UsedIndices(Sz);
1707 for (int Idx : Mask) {
1708 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1709 return nullptr;
1710 UsedIndices.set(Idx);
1711 }
1712
1713 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1714 // other changes.
1715 return UsedIndices.all() ? V : nullptr;
1716}
1717
1718/// Fold an unsigned minimum of trailing or leading zero bits counts:
1719/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1720/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1721/// >> ConstOp))
1722/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1723/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1724template <Intrinsic::ID IntrID>
1725static Value *
1727 const DataLayout &DL,
1728 InstCombiner::BuilderTy &Builder) {
1729 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1730 "This helper only supports cttz and ctlz intrinsics");
1731
1732 Value *CtOp1, *CtOp2;
1733 Value *ZeroUndef1, *ZeroUndef2;
1734 if (!match(I0, m_OneUse(
1735 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1736 return nullptr;
1737
1738 if (match(I1,
1739 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1740 return Builder.CreateBinaryIntrinsic(
1741 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1742 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1743
1744 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1745 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1746 if (!match(I1, m_CheckedInt(LessBitWidth)))
1747 // We have a constant >= BitWidth (which can be handled by CVP)
1748 // or a non-splat vector with elements < and >= BitWidth
1749 return nullptr;
1750
1751 Type *Ty = I1->getType();
1753 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1754 IntrID == Intrinsic::cttz
1755 ? ConstantInt::get(Ty, 1)
1756 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1757 cast<Constant>(I1), DL);
1758 return Builder.CreateBinaryIntrinsic(
1759 IntrID, Builder.CreateOr(CtOp1, NewConst),
1760 ConstantInt::getTrue(ZeroUndef1->getType()));
1761}
1762
1763/// Return whether "X LOp (Y ROp Z)" is always equal to
1764/// "(X LOp Y) ROp (X LOp Z)".
1766 bool HasNSW, Intrinsic::ID ROp) {
1767 switch (ROp) {
1768 case Intrinsic::umax:
1769 case Intrinsic::umin:
1770 if (HasNUW && LOp == Instruction::Add)
1771 return true;
1772 if (HasNUW && LOp == Instruction::Shl)
1773 return true;
1774 return false;
1775 case Intrinsic::smax:
1776 case Intrinsic::smin:
1777 return HasNSW && LOp == Instruction::Add;
1778 default:
1779 return false;
1780 }
1781}
1782
1783/// Return whether "(X ROp Y) LOp Z" is always equal to
1784/// "(X LOp Z) ROp (Y LOp Z)".
1786 bool HasNSW, Intrinsic::ID ROp) {
1787 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1788 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1789 switch (ROp) {
1790 case Intrinsic::umax:
1791 case Intrinsic::umin:
1792 return HasNUW && LOp == Instruction::Sub;
1793 case Intrinsic::smax:
1794 case Intrinsic::smin:
1795 return HasNSW && LOp == Instruction::Sub;
1796 default:
1797 return false;
1798 }
1799}
1800
1801// Attempts to factorise a common term
1802// in an instruction that has the form "(A op' B) op (C op' D)
1803// where op is an intrinsic and op' is a binop
1804static Value *
1806 InstCombiner::BuilderTy &Builder) {
1807 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1808 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1809
1812
1813 if (!Op0 || !Op1)
1814 return nullptr;
1815
1816 if (Op0->getOpcode() != Op1->getOpcode())
1817 return nullptr;
1818
1819 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1820 return nullptr;
1821
1822 Instruction::BinaryOps InnerOpcode =
1823 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1824 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1825 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1826
1827 Value *A = Op0->getOperand(0);
1828 Value *B = Op0->getOperand(1);
1829 Value *C = Op1->getOperand(0);
1830 Value *D = Op1->getOperand(1);
1831
1832 // Attempts to swap variables such that A equals C or B equals D,
1833 // if the inner operation is commutative.
1834 if (Op0->isCommutative() && A != C && B != D) {
1835 if (A == D || B == C)
1836 std::swap(C, D);
1837 else
1838 return nullptr;
1839 }
1840
1841 BinaryOperator *NewBinop;
1842 if (A == C &&
1843 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1844 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1845 NewBinop =
1846 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1847 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1848 TopLevelOpcode)) {
1849 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1850 NewBinop =
1851 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1852 } else {
1853 return nullptr;
1854 }
1855
1856 NewBinop->setHasNoUnsignedWrap(HasNUW);
1857 NewBinop->setHasNoSignedWrap(HasNSW);
1858
1859 return NewBinop;
1860}
1861
1863 Value *Arg0 = II->getArgOperand(0);
1864 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1865 if (!ShiftConst)
1866 return nullptr;
1867
1868 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1869 bool AllPositive = true;
1870 bool AllNegative = true;
1871
1872 auto Check = [&](Constant *C) -> bool {
1873 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1874 const APInt &V = CI->getValue();
1875 if (V.isNonNegative()) {
1876 AllNegative = false;
1877 return AllPositive && V.ult(ElemBits);
1878 }
1879 AllPositive = false;
1880 return AllNegative && V.sgt(-ElemBits);
1881 }
1882 return false;
1883 };
1884
1885 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1886 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1887 if (!Check(ShiftConst->getAggregateElement(I)))
1888 return nullptr;
1889 }
1890
1891 } else if (!Check(ShiftConst))
1892 return nullptr;
1893
1894 IRBuilderBase &B = IC.Builder;
1895 if (AllPositive)
1896 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1897
1898 Value *NegAmt = B.CreateNeg(ShiftConst);
1899 Intrinsic::ID IID = II->getIntrinsicID();
1900 const bool IsSigned =
1901 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1902 Value *Result =
1903 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1904 return IC.replaceInstUsesWith(*II, Result);
1905}
1906
1907/// CallInst simplification. This mostly only handles folding of intrinsic
1908/// instructions. For normal calls, it allows visitCallBase to do the heavy
1909/// lifting.
1911 // Don't try to simplify calls without uses. It will not do anything useful,
1912 // but will result in the following folds being skipped.
1913 if (!CI.use_empty()) {
1914 SmallVector<Value *, 8> Args(CI.args());
1915 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1916 SQ.getWithInstruction(&CI)))
1917 return replaceInstUsesWith(CI, V);
1918 }
1919
1920 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1921 return visitFree(CI, FreedOp);
1922
1923 // If the caller function (i.e. us, the function that contains this CallInst)
1924 // is nounwind, mark the call as nounwind, even if the callee isn't.
1925 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1926 CI.setDoesNotThrow();
1927 return &CI;
1928 }
1929
1931 if (!II)
1932 return visitCallBase(CI);
1933
1934 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1935 // instead of in visitCallBase.
1936 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1937 if (auto NumBytes = MI->getLengthInBytes()) {
1938 // memmove/cpy/set of zero bytes is a noop.
1939 if (NumBytes->isZero())
1940 return eraseInstFromFunction(CI);
1941
1942 // For atomic unordered mem intrinsics if len is not a positive or
1943 // not a multiple of element size then behavior is undefined.
1944 if (MI->isAtomic() &&
1945 (NumBytes->isNegative() ||
1946 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1948 assert(MI->getType()->isVoidTy() &&
1949 "non void atomic unordered mem intrinsic");
1950 return eraseInstFromFunction(*MI);
1951 }
1952 }
1953
1954 // No other transformations apply to volatile transfers.
1955 if (MI->isVolatile())
1956 return nullptr;
1957
1959 // memmove(x,x,size) -> noop.
1960 if (MTI->getSource() == MTI->getDest())
1961 return eraseInstFromFunction(CI);
1962 }
1963
1964 auto IsPointerUndefined = [MI](Value *Ptr) {
1965 return isa<ConstantPointerNull>(Ptr) &&
1967 MI->getFunction(),
1968 cast<PointerType>(Ptr->getType())->getAddressSpace());
1969 };
1970 bool SrcIsUndefined = false;
1971 // If we can determine a pointer alignment that is bigger than currently
1972 // set, update the alignment.
1973 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1975 return I;
1976 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1977 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1978 if (Instruction *I = SimplifyAnyMemSet(MSI))
1979 return I;
1980 }
1981
1982 // If src/dest is null, this memory intrinsic must be a noop.
1983 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1984 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1985 return eraseInstFromFunction(CI);
1986 }
1987
1988 // If we have a memmove and the source operation is a constant global,
1989 // then the source and dest pointers can't alias, so we can change this
1990 // into a call to memcpy.
1991 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1992 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1993 if (GVSrc->isConstant()) {
1994 Module *M = CI.getModule();
1995 Intrinsic::ID MemCpyID =
1996 MMI->isAtomic()
1997 ? Intrinsic::memcpy_element_unordered_atomic
1998 : Intrinsic::memcpy;
1999 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
2000 CI.getArgOperand(1)->getType(),
2001 CI.getArgOperand(2)->getType() };
2003 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
2004 return II;
2005 }
2006 }
2007 }
2008
2009 // For fixed width vector result intrinsics, use the generic demanded vector
2010 // support.
2011 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
2012 auto VWidth = IIFVTy->getNumElements();
2013 APInt PoisonElts(VWidth, 0);
2014 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2015 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
2016 if (V != II)
2017 return replaceInstUsesWith(*II, V);
2018 return II;
2019 }
2020 }
2021
2022 if (II->isCommutative()) {
2023 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2024 replaceOperand(*II, 0, Pair->first);
2025 replaceOperand(*II, 1, Pair->second);
2026 return II;
2027 }
2028
2029 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2030 return NewCall;
2031 }
2032
2033 // Unused constrained FP intrinsic calls may have declared side effect, which
2034 // prevents it from being removed. In some cases however the side effect is
2035 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2036 // returns a replacement, the call may be removed.
2037 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2038 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2039 return eraseInstFromFunction(CI);
2040 }
2041
2042 Intrinsic::ID IID = II->getIntrinsicID();
2043 switch (IID) {
2044 case Intrinsic::objectsize: {
2045 SmallVector<Instruction *> InsertedInstructions;
2046 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2047 &InsertedInstructions)) {
2048 for (Instruction *Inserted : InsertedInstructions)
2049 Worklist.add(Inserted);
2050 return replaceInstUsesWith(CI, V);
2051 }
2052 return nullptr;
2053 }
2054 case Intrinsic::abs: {
2055 Value *IIOperand = II->getArgOperand(0);
2056 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2057
2058 // abs(-x) -> abs(x)
2059 Value *X;
2060 if (match(IIOperand, m_Neg(m_Value(X)))) {
2061 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2062 replaceOperand(*II, 1, Builder.getTrue());
2063 return replaceOperand(*II, 0, X);
2064 }
2065 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2066 return replaceOperand(*II, 0, X);
2067
2068 Value *Y;
2069 // abs(a * abs(b)) -> abs(a * b)
2070 if (match(IIOperand,
2073 bool NSW =
2074 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2075 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2076 return replaceOperand(*II, 0, XY);
2077 }
2078
2079 if (std::optional<bool> Known =
2080 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2081 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2082 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2083 if (!*Known)
2084 return replaceInstUsesWith(*II, IIOperand);
2085
2086 // abs(x) -> -x if x < 0
2087 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2088 if (IntMinIsPoison)
2089 return BinaryOperator::CreateNSWNeg(IIOperand);
2090 return BinaryOperator::CreateNeg(IIOperand);
2091 }
2092
2093 // abs (sext X) --> zext (abs X*)
2094 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2095 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2096 Value *NarrowAbs =
2097 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2098 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2099 }
2100
2101 // Match a complicated way to check if a number is odd/even:
2102 // abs (srem X, 2) --> and X, 1
2103 const APInt *C;
2104 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2105 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2106
2107 break;
2108 }
2109 case Intrinsic::umin: {
2110 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2111 // umin(x, 1) == zext(x != 0)
2112 if (match(I1, m_One())) {
2113 assert(II->getType()->getScalarSizeInBits() != 1 &&
2114 "Expected simplify of umin with max constant");
2115 Value *Zero = Constant::getNullValue(I0->getType());
2116 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2117 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2118 }
2119 // umin(cttz(x), const) --> cttz(x | (1 << const))
2120 if (Value *FoldedCttz =
2122 I0, I1, DL, Builder))
2123 return replaceInstUsesWith(*II, FoldedCttz);
2124 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2125 if (Value *FoldedCtlz =
2127 I0, I1, DL, Builder))
2128 return replaceInstUsesWith(*II, FoldedCtlz);
2129 [[fallthrough]];
2130 }
2131 case Intrinsic::umax: {
2132 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2133 Value *X, *Y;
2134 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2135 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2136 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2137 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2138 }
2139 Constant *C;
2140 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2141 I0->hasOneUse()) {
2142 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2143 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2144 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2145 }
2146 }
2147 // If C is not 0:
2148 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2149 // If C is not 0 or 1:
2150 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2151 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2152 const APInt *C;
2153 Value *X;
2154 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2155 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2156 return nullptr;
2157 if (C->isZero())
2158 return nullptr;
2159 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2160 return nullptr;
2161
2162 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2163 Value *NewSelect = nullptr;
2164 NewSelect = Builder.CreateSelectWithUnknownProfile(
2165 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2166 return replaceInstUsesWith(*II, NewSelect);
2167 };
2168
2169 if (IID == Intrinsic::umax) {
2170 if (Instruction *I = foldMaxMulShift(I0, I1))
2171 return I;
2172 if (Instruction *I = foldMaxMulShift(I1, I0))
2173 return I;
2174 }
2175
2176 // If both operands of unsigned min/max are sign-extended, it is still ok
2177 // to narrow the operation.
2178 [[fallthrough]];
2179 }
2180 case Intrinsic::smax:
2181 case Intrinsic::smin: {
2182 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2183 Value *X, *Y;
2184 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2185 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2186 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2187 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2188 }
2189
2190 Constant *C;
2191 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2192 I0->hasOneUse()) {
2193 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2194 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2195 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2196 }
2197 }
2198
2199 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2200 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2201 const APInt *MinC, *MaxC;
2202 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2203 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2204 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2205 Value *NewMax = Builder.CreateBinaryIntrinsic(
2206 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2207 return replaceInstUsesWith(
2208 *II, Builder.CreateBinaryIntrinsic(
2209 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2210 };
2211 if (IID == Intrinsic::smax &&
2213 m_APInt(MinC)))) &&
2214 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2215 return CreateCanonicalClampForm(true);
2216 if (IID == Intrinsic::umax &&
2218 m_APInt(MinC)))) &&
2219 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2220 return CreateCanonicalClampForm(false);
2221
2222 // umin(i1 X, i1 Y) -> and i1 X, Y
2223 // smax(i1 X, i1 Y) -> and i1 X, Y
2224 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2225 II->getType()->isIntOrIntVectorTy(1)) {
2226 return BinaryOperator::CreateAnd(I0, I1);
2227 }
2228
2229 // umax(i1 X, i1 Y) -> or i1 X, Y
2230 // smin(i1 X, i1 Y) -> or i1 X, Y
2231 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2232 II->getType()->isIntOrIntVectorTy(1)) {
2233 return BinaryOperator::CreateOr(I0, I1);
2234 }
2235
2236 // smin(smax(X, -1), 1) -> scmp(X, 0)
2237 // smax(smin(X, 1), -1) -> scmp(X, 0)
2238 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2239 // And i1's have been changed to and/ors
2240 // So we only need to check for smin
2241 if (IID == Intrinsic::smin) {
2242 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2243 match(I1, m_One())) {
2244 Value *Zero = ConstantInt::get(X->getType(), 0);
2245 return replaceInstUsesWith(
2246 CI,
2247 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2248 }
2249 }
2250
2251 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2252 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2253 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2254 // TODO: Canonicalize neg after min/max if I1 is constant.
2255 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2256 (I0->hasOneUse() || I1->hasOneUse())) {
2258 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2259 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2260 }
2261 }
2262
2263 // (umax X, (xor X, Pow2))
2264 // -> (or X, Pow2)
2265 // (umin X, (xor X, Pow2))
2266 // -> (and X, ~Pow2)
2267 // (smax X, (xor X, Pos_Pow2))
2268 // -> (or X, Pos_Pow2)
2269 // (smin X, (xor X, Pos_Pow2))
2270 // -> (and X, ~Pos_Pow2)
2271 // (smax X, (xor X, Neg_Pow2))
2272 // -> (and X, ~Neg_Pow2)
2273 // (smin X, (xor X, Neg_Pow2))
2274 // -> (or X, Neg_Pow2)
2275 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2276 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2277 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2278 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2279 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2280
2281 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2282 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2283 if (KnownSign == std::nullopt) {
2284 UseOr = false;
2285 UseAndN = false;
2286 } else if (*KnownSign /* true is Signed. */) {
2287 UseOr ^= true;
2288 UseAndN ^= true;
2289 Type *Ty = I0->getType();
2290 // Negative power of 2 must be IntMin. It's possible to be able to
2291 // prove negative / power of 2 without actually having known bits, so
2292 // just get the value by hand.
2294 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2295 }
2296 }
2297 if (UseOr)
2298 return BinaryOperator::CreateOr(I0, X);
2299 else if (UseAndN)
2300 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2301 }
2302
2303 // If we can eliminate ~A and Y is free to invert:
2304 // max ~A, Y --> ~(min A, ~Y)
2305 //
2306 // Examples:
2307 // max ~A, ~Y --> ~(min A, Y)
2308 // max ~A, C --> ~(min A, ~C)
2309 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2310 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2311 Value *A;
2312 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2313 !isFreeToInvert(A, A->hasOneUse())) {
2314 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2316 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2317 return BinaryOperator::CreateNot(InvMaxMin);
2318 }
2319 }
2320 return nullptr;
2321 };
2322
2323 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2324 return I;
2325 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2326 return I;
2327
2329 return I;
2330
2331 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2332 const APInt *RHSC;
2333 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2334 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2335 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2336 ConstantInt::get(II->getType(), *RHSC));
2337
2338 // smax(X, -X) --> abs(X)
2339 // smin(X, -X) --> -abs(X)
2340 // umax(X, -X) --> -abs(X)
2341 // umin(X, -X) --> abs(X)
2342 if (isKnownNegation(I0, I1)) {
2343 // We can choose either operand as the input to abs(), but if we can
2344 // eliminate the only use of a value, that's better for subsequent
2345 // transforms/analysis.
2346 if (I0->hasOneUse() && !I1->hasOneUse())
2347 std::swap(I0, I1);
2348
2349 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2350 // operation and potentially its negation.
2351 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2352 Value *Abs = Builder.CreateBinaryIntrinsic(
2353 Intrinsic::abs, I0,
2354 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2355
2356 // We don't have a "nabs" intrinsic, so negate if needed based on the
2357 // max/min operation.
2358 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2359 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2360 return replaceInstUsesWith(CI, Abs);
2361 }
2362
2364 return Sel;
2365
2366 if (Instruction *SAdd = matchSAddSubSat(*II))
2367 return SAdd;
2368
2369 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2370 return replaceInstUsesWith(*II, NewMinMax);
2371
2373 return R;
2374
2375 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2376 return NewMinMax;
2377
2378 // Try to fold minmax with constant RHS based on range information
2379 if (match(I1, m_APIntAllowPoison(RHSC))) {
2380 ICmpInst::Predicate Pred =
2382 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2384 I0, IsSigned, SQ.getWithInstruction(II));
2385 if (!LHS_CR.isFullSet()) {
2386 if (LHS_CR.icmp(Pred, *RHSC))
2387 return replaceInstUsesWith(*II, I0);
2388 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2389 return replaceInstUsesWith(*II,
2390 ConstantInt::get(II->getType(), *RHSC));
2391 }
2392 }
2393
2395 return replaceInstUsesWith(*II, V);
2396
2397 break;
2398 }
2399 case Intrinsic::scmp: {
2400 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2401 Value *LHS, *RHS;
2402 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2403 return replaceInstUsesWith(
2404 CI,
2405 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2406 break;
2407 }
2408 case Intrinsic::bitreverse: {
2409 Value *IIOperand = II->getArgOperand(0);
2410 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2411 Value *X;
2412 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2413 X->getType()->isIntOrIntVectorTy(1)) {
2414 Type *Ty = II->getType();
2415 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2416 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2418 }
2419
2420 if (Instruction *crossLogicOpFold =
2422 return crossLogicOpFold;
2423
2424 break;
2425 }
2426 case Intrinsic::bswap: {
2427 Value *IIOperand = II->getArgOperand(0);
2428
2429 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2430 // inverse-shift-of-bswap:
2431 // bswap (shl X, Y) --> lshr (bswap X), Y
2432 // bswap (lshr X, Y) --> shl (bswap X), Y
2433 Value *X, *Y;
2434 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2435 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2437 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2438 BinaryOperator::BinaryOps InverseShift =
2439 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2440 ? Instruction::LShr
2441 : Instruction::Shl;
2442 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2443 }
2444 }
2445
2446 KnownBits Known = computeKnownBits(IIOperand, II);
2447 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2448 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2449 unsigned BW = Known.getBitWidth();
2450
2451 // bswap(x) -> shift(x) if x has exactly one "active byte"
2452 if (BW - LZ - TZ == 8) {
2453 assert(LZ != TZ && "active byte cannot be in the middle");
2454 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2455 return BinaryOperator::CreateNUWShl(
2456 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2457 // -> lshr(x) if the "active byte" is in the high part of x
2458 return BinaryOperator::CreateExactLShr(
2459 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2460 }
2461
2462 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2463 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2464 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2465 Value *CV = ConstantInt::get(X->getType(), C);
2466 Value *V = Builder.CreateLShr(X, CV);
2467 return new TruncInst(V, IIOperand->getType());
2468 }
2469
2470 if (Instruction *crossLogicOpFold =
2472 return crossLogicOpFold;
2473 }
2474
2475 // Try to fold into bitreverse if bswap is the root of the expression tree.
2476 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2477 /*MatchBitReversals*/ true))
2478 return BitOp;
2479 break;
2480 }
2481 case Intrinsic::masked_load:
2482 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2483 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2484 break;
2485 case Intrinsic::masked_store:
2486 return simplifyMaskedStore(*II);
2487 case Intrinsic::masked_gather:
2488 return simplifyMaskedGather(*II);
2489 case Intrinsic::masked_scatter:
2490 return simplifyMaskedScatter(*II);
2491 case Intrinsic::launder_invariant_group:
2492 case Intrinsic::strip_invariant_group:
2493 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2494 return replaceInstUsesWith(*II, SkippedBarrier);
2495 break;
2496 case Intrinsic::powi:
2497 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2498 // 0 and 1 are handled in instsimplify
2499 // powi(x, -1) -> 1/x
2500 if (Power->isMinusOne())
2501 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2502 II->getArgOperand(0), II);
2503 // powi(x, 2) -> x*x
2504 if (Power->equalsInt(2))
2505 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2506 II->getArgOperand(0), II);
2507
2508 if (!Power->getValue()[0]) {
2509 Value *X;
2510 // If power is even:
2511 // powi(-x, p) -> powi(x, p)
2512 // powi(fabs(x), p) -> powi(x, p)
2513 // powi(copysign(x, y), p) -> powi(x, p)
2514 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2515 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2516 match(II->getArgOperand(0),
2518 return replaceOperand(*II, 0, X);
2519 }
2520 }
2521 break;
2522
2523 case Intrinsic::cttz:
2524 case Intrinsic::ctlz:
2525 if (auto *I = foldCttzCtlz(*II, *this))
2526 return I;
2527 break;
2528
2529 case Intrinsic::ctpop:
2530 if (auto *I = foldCtpop(*II, *this))
2531 return I;
2532 break;
2533
2534 case Intrinsic::fshl:
2535 case Intrinsic::fshr: {
2536 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2537 Type *Ty = II->getType();
2538 unsigned BitWidth = Ty->getScalarSizeInBits();
2539 Constant *ShAmtC;
2540 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2541 // Canonicalize a shift amount constant operand to modulo the bit-width.
2542 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2543 Constant *ModuloC =
2544 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2545 if (!ModuloC)
2546 return nullptr;
2547 if (ModuloC != ShAmtC)
2548 return replaceOperand(*II, 2, ModuloC);
2549
2551 ShAmtC, DL),
2552 m_One()) &&
2553 "Shift amount expected to be modulo bitwidth");
2554
2555 // Canonicalize funnel shift right by constant to funnel shift left. This
2556 // is not entirely arbitrary. For historical reasons, the backend may
2557 // recognize rotate left patterns but miss rotate right patterns.
2558 if (IID == Intrinsic::fshr) {
2559 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2560 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2561 return nullptr;
2562
2563 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2564 Module *Mod = II->getModule();
2565 Function *Fshl =
2566 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2567 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2568 }
2569 assert(IID == Intrinsic::fshl &&
2570 "All funnel shifts by simple constants should go left");
2571
2572 // fshl(X, 0, C) --> shl X, C
2573 // fshl(X, undef, C) --> shl X, C
2574 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2575 return BinaryOperator::CreateShl(Op0, ShAmtC);
2576
2577 // fshl(0, X, C) --> lshr X, (BW-C)
2578 // fshl(undef, X, C) --> lshr X, (BW-C)
2579 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2580 return BinaryOperator::CreateLShr(Op1,
2581 ConstantExpr::getSub(WidthC, ShAmtC));
2582
2583 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2584 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2585 Module *Mod = II->getModule();
2586 Function *Bswap =
2587 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2588 return CallInst::Create(Bswap, { Op0 });
2589 }
2590 if (Instruction *BitOp =
2591 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2592 /*MatchBitReversals*/ true))
2593 return BitOp;
2594
2595 // R = fshl(X, X, C2)
2596 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2597 Value *InnerOp;
2598 const APInt *ShAmtInnerC, *ShAmtOuterC;
2599 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2600 m_APInt(ShAmtInnerC))) &&
2601 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2602 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2603 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2604 if (Modulo.isZero())
2605 return replaceInstUsesWith(*II, InnerOp);
2606 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2608 {InnerOp, InnerOp, ModuloC});
2609 }
2610 }
2611
2612 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2613 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2614 // if BitWidth is a power-of-2
2615 Value *Y;
2616 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2617 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2618 Module *Mod = II->getModule();
2620 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2621 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2622 }
2623
2624 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2625 // power-of-2
2626 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2627 match(Op1, m_ZeroInt())) {
2628 Value *Op2 = II->getArgOperand(2);
2629 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2630 return BinaryOperator::CreateShl(Op0, And);
2631 }
2632
2633 // Left or right might be masked.
2635 return &CI;
2636
2637 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2638 // so only the low bits of the shift amount are demanded if the bitwidth is
2639 // a power-of-2.
2640 if (!isPowerOf2_32(BitWidth))
2641 break;
2643 KnownBits Op2Known(BitWidth);
2644 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2645 return &CI;
2646 break;
2647 }
2648 case Intrinsic::ptrmask: {
2649 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2650 KnownBits Known(BitWidth);
2652 return II;
2653
2654 Value *InnerPtr, *InnerMask;
2655 bool Changed = false;
2656 // Combine:
2657 // (ptrmask (ptrmask p, A), B)
2658 // -> (ptrmask p, (and A, B))
2659 if (match(II->getArgOperand(0),
2661 m_Value(InnerMask))))) {
2662 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2663 "Mask types must match");
2664 // TODO: If InnerMask == Op1, we could copy attributes from inner
2665 // callsite -> outer callsite.
2666 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2667 replaceOperand(CI, 0, InnerPtr);
2668 replaceOperand(CI, 1, NewMask);
2669 Changed = true;
2670 }
2671
2672 // See if we can deduce non-null.
2673 if (!CI.hasRetAttr(Attribute::NonNull) &&
2674 (Known.isNonZero() ||
2675 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2676 CI.addRetAttr(Attribute::NonNull);
2677 Changed = true;
2678 }
2679
2680 unsigned NewAlignmentLog =
2682 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2683 // Known bits will capture if we had alignment information associated with
2684 // the pointer argument.
2685 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2687 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2688 Changed = true;
2689 }
2690 if (Changed)
2691 return &CI;
2692 break;
2693 }
2694 case Intrinsic::uadd_with_overflow:
2695 case Intrinsic::sadd_with_overflow: {
2696 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2697 return I;
2698
2699 // Given 2 constant operands whose sum does not overflow:
2700 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2701 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2702 Value *X;
2703 const APInt *C0, *C1;
2704 Value *Arg0 = II->getArgOperand(0);
2705 Value *Arg1 = II->getArgOperand(1);
2706 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2707 bool HasNWAdd = IsSigned
2708 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2709 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2710 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2711 bool Overflow;
2712 APInt NewC =
2713 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2714 if (!Overflow)
2715 return replaceInstUsesWith(
2716 *II, Builder.CreateBinaryIntrinsic(
2717 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2718 }
2719 break;
2720 }
2721
2722 case Intrinsic::umul_with_overflow:
2723 case Intrinsic::smul_with_overflow:
2724 case Intrinsic::usub_with_overflow:
2725 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2726 return I;
2727 break;
2728
2729 case Intrinsic::ssub_with_overflow: {
2730 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2731 return I;
2732
2733 Constant *C;
2734 Value *Arg0 = II->getArgOperand(0);
2735 Value *Arg1 = II->getArgOperand(1);
2736 // Given a constant C that is not the minimum signed value
2737 // for an integer of a given bit width:
2738 //
2739 // ssubo X, C -> saddo X, -C
2740 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2741 Value *NegVal = ConstantExpr::getNeg(C);
2742 // Build a saddo call that is equivalent to the discovered
2743 // ssubo call.
2744 return replaceInstUsesWith(
2745 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2746 Arg0, NegVal));
2747 }
2748
2749 break;
2750 }
2751
2752 case Intrinsic::uadd_sat:
2753 case Intrinsic::sadd_sat:
2754 case Intrinsic::usub_sat:
2755 case Intrinsic::ssub_sat: {
2757 Type *Ty = SI->getType();
2758 Value *Arg0 = SI->getLHS();
2759 Value *Arg1 = SI->getRHS();
2760
2761 // Make use of known overflow information.
2762 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2763 Arg0, Arg1, SI);
2764 switch (OR) {
2766 break;
2768 if (SI->isSigned())
2769 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2770 else
2771 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2773 unsigned BitWidth = Ty->getScalarSizeInBits();
2774 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2775 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2776 }
2778 unsigned BitWidth = Ty->getScalarSizeInBits();
2779 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2780 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2781 }
2782 }
2783
2784 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2785 // which after that:
2786 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2787 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2788 Constant *C, *C1;
2789 Value *A;
2790 if (IID == Intrinsic::usub_sat &&
2791 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2792 match(Arg1, m_ImmConstant(C1))) {
2793 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2794 auto *NewSub =
2795 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2796 return replaceInstUsesWith(*SI, NewSub);
2797 }
2798
2799 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2800 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2801 C->isNotMinSignedValue()) {
2802 Value *NegVal = ConstantExpr::getNeg(C);
2803 return replaceInstUsesWith(
2804 *II, Builder.CreateBinaryIntrinsic(
2805 Intrinsic::sadd_sat, Arg0, NegVal));
2806 }
2807
2808 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2809 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2810 // if Val and Val2 have the same sign
2811 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2812 Value *X;
2813 const APInt *Val, *Val2;
2814 APInt NewVal;
2815 bool IsUnsigned =
2816 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2817 if (Other->getIntrinsicID() == IID &&
2818 match(Arg1, m_APInt(Val)) &&
2819 match(Other->getArgOperand(0), m_Value(X)) &&
2820 match(Other->getArgOperand(1), m_APInt(Val2))) {
2821 if (IsUnsigned)
2822 NewVal = Val->uadd_sat(*Val2);
2823 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2824 bool Overflow;
2825 NewVal = Val->sadd_ov(*Val2, Overflow);
2826 if (Overflow) {
2827 // Both adds together may add more than SignedMaxValue
2828 // without saturating the final result.
2829 break;
2830 }
2831 } else {
2832 // Cannot fold saturated addition with different signs.
2833 break;
2834 }
2835
2836 return replaceInstUsesWith(
2837 *II, Builder.CreateBinaryIntrinsic(
2838 IID, X, ConstantInt::get(II->getType(), NewVal)));
2839 }
2840 }
2841 break;
2842 }
2843
2844 case Intrinsic::minnum:
2845 case Intrinsic::maxnum:
2846 case Intrinsic::minimumnum:
2847 case Intrinsic::maximumnum:
2848 case Intrinsic::minimum:
2849 case Intrinsic::maximum: {
2850 Value *Arg0 = II->getArgOperand(0);
2851 Value *Arg1 = II->getArgOperand(1);
2852 Value *X, *Y;
2853 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2854 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2855 // If both operands are negated, invert the call and negate the result:
2856 // min(-X, -Y) --> -(max(X, Y))
2857 // max(-X, -Y) --> -(min(X, Y))
2858 Intrinsic::ID NewIID;
2859 switch (IID) {
2860 case Intrinsic::maxnum:
2861 NewIID = Intrinsic::minnum;
2862 break;
2863 case Intrinsic::minnum:
2864 NewIID = Intrinsic::maxnum;
2865 break;
2866 case Intrinsic::maximumnum:
2867 NewIID = Intrinsic::minimumnum;
2868 break;
2869 case Intrinsic::minimumnum:
2870 NewIID = Intrinsic::maximumnum;
2871 break;
2872 case Intrinsic::maximum:
2873 NewIID = Intrinsic::minimum;
2874 break;
2875 case Intrinsic::minimum:
2876 NewIID = Intrinsic::maximum;
2877 break;
2878 default:
2879 llvm_unreachable("unexpected intrinsic ID");
2880 }
2881 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2882 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2883 FNeg->copyIRFlags(II);
2884 return FNeg;
2885 }
2886
2887 // m(m(X, C2), C1) -> m(X, C)
2888 const APFloat *C1, *C2;
2889 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2890 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2891 ((match(M->getArgOperand(0), m_Value(X)) &&
2892 match(M->getArgOperand(1), m_APFloat(C2))) ||
2893 (match(M->getArgOperand(1), m_Value(X)) &&
2894 match(M->getArgOperand(0), m_APFloat(C2))))) {
2895 APFloat Res(0.0);
2896 switch (IID) {
2897 case Intrinsic::maxnum:
2898 Res = maxnum(*C1, *C2);
2899 break;
2900 case Intrinsic::minnum:
2901 Res = minnum(*C1, *C2);
2902 break;
2903 case Intrinsic::maximumnum:
2904 Res = maximumnum(*C1, *C2);
2905 break;
2906 case Intrinsic::minimumnum:
2907 Res = minimumnum(*C1, *C2);
2908 break;
2909 case Intrinsic::maximum:
2910 Res = maximum(*C1, *C2);
2911 break;
2912 case Intrinsic::minimum:
2913 Res = minimum(*C1, *C2);
2914 break;
2915 default:
2916 llvm_unreachable("unexpected intrinsic ID");
2917 }
2918 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2919 // was a simplification (so Arg0 and its original flags could
2920 // propagate?)
2921 Value *V = Builder.CreateBinaryIntrinsic(
2922 IID, X, ConstantFP::get(Arg0->getType(), Res),
2924 return replaceInstUsesWith(*II, V);
2925 }
2926 }
2927
2928 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2929 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2930 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2931 X->getType() == Y->getType()) {
2932 Value *NewCall =
2933 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2934 return new FPExtInst(NewCall, II->getType());
2935 }
2936
2937 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2938 Constant *C;
2939 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2940 match(Arg1, m_ImmConstant(C))) {
2941 if (Constant *TruncC =
2942 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2943 Value *NewCall =
2944 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2945 return new FPExtInst(NewCall, II->getType());
2946 }
2947 }
2948
2949 // max X, -X --> fabs X
2950 // min X, -X --> -(fabs X)
2951 // TODO: Remove one-use limitation? That is obviously better for max,
2952 // hence why we don't check for one-use for that. However,
2953 // it would be an extra instruction for min (fnabs), but
2954 // that is still likely better for analysis and codegen.
2955 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2956 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2957 return Op0->hasOneUse() ||
2958 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2959 IID != Intrinsic::minimumnum);
2960 return false;
2961 };
2962
2963 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2964 Value *R = Builder.CreateFAbs(X, II);
2965 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2966 IID == Intrinsic::minimumnum)
2967 R = Builder.CreateFNegFMF(R, II);
2968 return replaceInstUsesWith(*II, R);
2969 }
2970
2971 break;
2972 }
2973 case Intrinsic::matrix_multiply: {
2974 // Optimize negation in matrix multiplication.
2975
2976 // -A * -B -> A * B
2977 Value *A, *B;
2978 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2979 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2980 replaceOperand(*II, 0, A);
2981 replaceOperand(*II, 1, B);
2982 return II;
2983 }
2984
2985 Value *Op0 = II->getOperand(0);
2986 Value *Op1 = II->getOperand(1);
2987 Value *OpNotNeg, *NegatedOp;
2988 unsigned NegatedOpArg, OtherOpArg;
2989 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2990 NegatedOp = Op0;
2991 NegatedOpArg = 0;
2992 OtherOpArg = 1;
2993 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2994 NegatedOp = Op1;
2995 NegatedOpArg = 1;
2996 OtherOpArg = 0;
2997 } else
2998 // Multiplication doesn't have a negated operand.
2999 break;
3000
3001 // Only optimize if the negated operand has only one use.
3002 if (!NegatedOp->hasOneUse())
3003 break;
3004
3005 Value *OtherOp = II->getOperand(OtherOpArg);
3006 VectorType *RetTy = cast<VectorType>(II->getType());
3007 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
3008 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
3009 ElementCount NegatedCount = NegatedOpTy->getElementCount();
3010 ElementCount OtherCount = OtherOpTy->getElementCount();
3011 ElementCount RetCount = RetTy->getElementCount();
3012 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
3013 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
3014 ElementCount::isKnownLT(OtherCount, RetCount)) {
3015 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
3016 replaceOperand(*II, NegatedOpArg, OpNotNeg);
3017 replaceOperand(*II, OtherOpArg, InverseOtherOp);
3018 return II;
3019 }
3020 // (-A) * B -> -(A * B), if it is cheaper to negate the result
3021 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
3022 SmallVector<Value *, 5> NewArgs(II->args());
3023 NewArgs[NegatedOpArg] = OpNotNeg;
3024 Instruction *NewMul =
3025 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3026 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3027 }
3028 break;
3029 }
3030 case Intrinsic::fmuladd: {
3031 // Try to simplify the underlying FMul.
3032 if (Value *V =
3033 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3034 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3035 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3036 II->getFastMathFlags());
3037
3038 [[fallthrough]];
3039 }
3040 case Intrinsic::fma: {
3041 // fma fneg(x), fneg(y), z -> fma x, y, z
3042 Value *Src0 = II->getArgOperand(0);
3043 Value *Src1 = II->getArgOperand(1);
3044 Value *Src2 = II->getArgOperand(2);
3045 Value *X, *Y;
3046 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3047 replaceOperand(*II, 0, X);
3048 replaceOperand(*II, 1, Y);
3049 return II;
3050 }
3051
3052 // fma fabs(x), fabs(x), z -> fma x, x, z
3053 if (match(Src0, m_FAbs(m_Value(X))) &&
3054 match(Src1, m_FAbs(m_Specific(X)))) {
3055 replaceOperand(*II, 0, X);
3056 replaceOperand(*II, 1, X);
3057 return II;
3058 }
3059
3060 // Try to simplify the underlying FMul. We can only apply simplifications
3061 // that do not require rounding.
3062 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3063 SQ.getWithInstruction(II)))
3064 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3065
3066 // fma x, y, 0 -> fmul x, y
3067 // This is always valid for -0.0, but requires nsz for +0.0 as
3068 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3069 if (match(Src2, m_NegZeroFP()) ||
3070 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3071 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3072
3073 // fma x, -1.0, y -> fsub y, x
3074 if (match(Src1, m_SpecificFP(-1.0)))
3075 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3076
3077 break;
3078 }
3079 case Intrinsic::copysign: {
3080 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3081 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3082 Sign, getSimplifyQuery().getWithInstruction(II))) {
3083 if (*KnownSignBit) {
3084 // If we know that the sign argument is negative, reduce to FNABS:
3085 // copysign Mag, -Sign --> fneg (fabs Mag)
3086 Value *Fabs = Builder.CreateFAbs(Mag, II);
3087 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3088 }
3089
3090 // If we know that the sign argument is positive, reduce to FABS:
3091 // copysign Mag, +Sign --> fabs Mag
3092 Value *Fabs = Builder.CreateFAbs(Mag, II);
3093 return replaceInstUsesWith(*II, Fabs);
3094 }
3095
3096 // Propagate sign argument through nested calls:
3097 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3098 Value *X;
3100 Value *CopySign =
3101 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3102 return replaceInstUsesWith(*II, CopySign);
3103 }
3104
3105 // Clear sign-bit of constant magnitude:
3106 // copysign -MagC, X --> copysign MagC, X
3107 // TODO: Support constant folding for fabs
3108 const APFloat *MagC;
3109 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3110 APFloat PosMagC = *MagC;
3111 PosMagC.clearSign();
3112 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3113 }
3114
3115 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3116 // copysign (fabs X), Sign --> copysign X, Sign
3117 // copysign (fneg X), Sign --> copysign X, Sign
3118 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3119 return replaceOperand(*II, 0, X);
3120
3121 Type *SignEltTy = Sign->getType()->getScalarType();
3122
3123 Value *CastSrc;
3124 if (match(Sign,
3126 CastSrc->getType()->isIntOrIntVectorTy() &&
3128 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3130 APInt::getSignMask(Known.getBitWidth()), Known,
3131 SQ))
3132 return II;
3133 }
3134
3135 break;
3136 }
3137 case Intrinsic::fabs: {
3138 Value *Cond, *TVal, *FVal;
3139 Value *Arg = II->getArgOperand(0);
3140 Value *X;
3141 // fabs (-X) --> fabs (X)
3142 if (match(Arg, m_FNeg(m_Value(X)))) {
3143 Value *Fabs = Builder.CreateFAbs(X, II);
3144 return replaceInstUsesWith(CI, Fabs);
3145 }
3146
3147 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3148 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3149 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3150 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3151 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3152 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3153 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3154 SI->setFastMathFlags(II->getFastMathFlags() |
3155 cast<SelectInst>(Arg)->getFastMathFlags());
3156 // Can't copy nsz to select, as even with the nsz flag the fabs result
3157 // always has the sign bit unset.
3158 SI->setHasNoSignedZeros(false);
3159 return SI;
3160 }
3161 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3162 if (match(TVal, m_FNeg(m_Specific(FVal))))
3163 return replaceOperand(*II, 0, FVal);
3164 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3165 if (match(FVal, m_FNeg(m_Specific(TVal))))
3166 return replaceOperand(*II, 0, TVal);
3167 }
3168
3169 Value *Magnitude, *Sign;
3170 if (match(II->getArgOperand(0),
3171 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3172 // fabs (copysign x, y) -> (fabs x)
3173 Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
3174 return replaceInstUsesWith(*II, AbsSign);
3175 }
3176
3177 [[fallthrough]];
3178 }
3179 case Intrinsic::ceil:
3180 case Intrinsic::floor:
3181 case Intrinsic::round:
3182 case Intrinsic::roundeven:
3183 case Intrinsic::nearbyint:
3184 case Intrinsic::rint:
3185 case Intrinsic::trunc: {
3186 Value *ExtSrc;
3187 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3188 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3189 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3190 return new FPExtInst(NarrowII, II->getType());
3191 }
3192 break;
3193 }
3194 case Intrinsic::cos:
3195 case Intrinsic::amdgcn_cos:
3196 case Intrinsic::cosh: {
3197 Value *X, *Sign;
3198 Value *Src = II->getArgOperand(0);
3199 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3200 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3201 // f(-x) --> f(x)
3202 // f(fabs(x)) --> f(x)
3203 // f(copysign(x, y)) --> f(x)
3204 // for f in {cos, cosh}
3205 return replaceOperand(*II, 0, X);
3206 }
3207 break;
3208 }
3209 case Intrinsic::sin:
3210 case Intrinsic::amdgcn_sin:
3211 case Intrinsic::sinh:
3212 case Intrinsic::tan:
3213 case Intrinsic::tanh: {
3214 Value *X;
3215 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3216 // f(-x) --> -f(x)
3217 // for f in {sin, sinh, tan, tanh}
3218 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3219 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3220 }
3221 break;
3222 }
3223 case Intrinsic::ldexp: {
3224 Value *Src = II->getArgOperand(0);
3225 Value *Exp = II->getArgOperand(1);
3226
3227 // ldexp(x, K) -> fmul x, 2^K
3228 uint64_t ConstExp;
3229 if (match(Exp, m_ConstantInt(ConstExp))) {
3230 const fltSemantics &FPTy =
3231 Src->getType()->getScalarType()->getFltSemantics();
3232
3233 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3235 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3236 // Skip overflow and underflow cases.
3237 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3238 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3239 }
3240 }
3241
3242 // ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b))
3243 //
3244 // A danger is if the first ldexp would overflow to infinity or underflow to
3245 // zero, but the combined exponent avoids it.
3246 //
3247 // We ignore this with reassoc, or if we know both exponents have the same
3248 // sign (since then we'd just double down on the over/underflow which would
3249 // occur anyway).
3250 //
3251 // ldexp can take arbitrary integer types, so we also need to ensure that
3252 // our exponent type is wide enough so that if sadd.sat(a, b) saturates,
3253 // then ldexp at the saturated exponent saturates to inf or zero as well.
3254 //
3255 // TODO: Could do better if we had range tracking for the input value
3256 // exponent. Also could broaden sign check to cover == 0 case.
3257 Value *InnerSrc;
3258 Value *InnerExp;
3260 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3261 Exp->getType() == InnerExp->getType()) {
3262 FastMathFlags FMF = II->getFastMathFlags();
3263 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3264
3265 if (ldexpSaturatingAddIsSafe(II->getType(), Exp->getType()) &&
3266 ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3267 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II)))) {
3268 Value *NewExp =
3269 Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, InnerExp, Exp);
3270 II->setArgOperand(1, NewExp);
3271 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3272 return replaceOperand(*II, 0, InnerSrc);
3273 }
3274 }
3275
3276 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3277 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3278 Value *ExtSrc;
3279 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3280 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3281 Value *Select =
3282 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3283 ConstantFP::get(II->getType(), 1.0));
3285 }
3286 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3287 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3288 Value *Select =
3289 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3290 ConstantFP::get(II->getType(), 1.0));
3292 }
3293
3294 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3295 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3296 ///
3297 // TODO: If we cared, should insert a canonicalize for x
3298 Value *SelectCond, *SelectLHS, *SelectRHS;
3299 if (match(II->getArgOperand(1),
3300 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3301 m_Value(SelectRHS))))) {
3302 Value *NewLdexp = nullptr;
3303 Value *Select = nullptr;
3304 if (match(SelectRHS, m_ZeroInt())) {
3305 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3306 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3307 } else if (match(SelectLHS, m_ZeroInt())) {
3308 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3309 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3310 }
3311
3312 if (NewLdexp) {
3313 Select->takeName(II);
3314 return replaceInstUsesWith(*II, Select);
3315 }
3316 }
3317
3318 break;
3319 }
3320 case Intrinsic::ptrauth_auth:
3321 case Intrinsic::ptrauth_resign: {
3322 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3323 // sign+auth component if the key and discriminator match.
3324 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3325 Value *Ptr = II->getArgOperand(0);
3326 Value *Key = II->getArgOperand(1);
3327 Value *Disc = II->getArgOperand(2);
3328 Value *DS = nullptr;
3329 if (auto Bundle = II->getOperandBundle(LLVMContext::OB_deactivation_symbol))
3330 DS = Bundle->Inputs[0];
3331
3332 // AuthKey will be the key we need to end up authenticating against in
3333 // whatever we replace this sequence with.
3334 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3335 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3336 Value *OtherDS = nullptr;
3337 if (auto Bundle =
3339 OtherDS = Bundle->Inputs[0];
3340 if (DS != OtherDS)
3341 break;
3342
3343 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3344 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3345 break;
3346 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3347 // The resign intrinsic does not support deactivation symbols.
3348 assert(!DS);
3349 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3350 break;
3351 AuthKey = CI->getArgOperand(1);
3352 AuthDisc = CI->getArgOperand(2);
3353 } else
3354 break;
3355 BasePtr = CI->getArgOperand(0);
3356 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3357 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3358 // our purposes, so check for that too.
3359 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3360 if (!CPA || DS || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3361 break;
3362
3363 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3364 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3365 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3366 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3367 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3368 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3369 SignDisc, /*AddrDisc=*/Null,
3370 /*DeactivationSymbol=*/Null);
3372 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3373 return eraseInstFromFunction(*II);
3374 }
3375
3376 // auth(ptrauth(p,k,d),k,d) -> p
3377 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3378 } else
3379 break;
3380
3381 unsigned NewIntrin;
3382 if (AuthKey && NeedSign) {
3383 // resign(0,1) + resign(1,2) = resign(0, 2)
3384 NewIntrin = Intrinsic::ptrauth_resign;
3385 } else if (AuthKey) {
3386 // resign(0,1) + auth(1) = auth(0)
3387 NewIntrin = Intrinsic::ptrauth_auth;
3388 } else if (NeedSign) {
3389 // sign(0) + resign(0, 1) = sign(1)
3390 NewIntrin = Intrinsic::ptrauth_sign;
3391 } else {
3392 // sign(0) + auth(0) = nop
3393 replaceInstUsesWith(*II, BasePtr);
3394 return eraseInstFromFunction(*II);
3395 }
3396
3397 SmallVector<Value *, 4> CallArgs;
3398 CallArgs.push_back(BasePtr);
3399 if (AuthKey) {
3400 CallArgs.push_back(AuthKey);
3401 CallArgs.push_back(AuthDisc);
3402 }
3403
3404 if (NeedSign) {
3405 CallArgs.push_back(II->getArgOperand(3));
3406 CallArgs.push_back(II->getArgOperand(4));
3407 }
3408
3409 std::vector<OperandBundleDef> Bundles;
3410 if (DS)
3411 Bundles.push_back(OperandBundleDef("deactivation-symbol", DS));
3412
3413 Function *NewFn =
3414 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3415 return CallInst::Create(NewFn, CallArgs, Bundles);
3416 }
3417 case Intrinsic::arm_neon_vtbl1:
3418 case Intrinsic::arm_neon_vtbl2:
3419 case Intrinsic::arm_neon_vtbl3:
3420 case Intrinsic::arm_neon_vtbl4:
3421 case Intrinsic::aarch64_neon_tbl1:
3422 case Intrinsic::aarch64_neon_tbl2:
3423 case Intrinsic::aarch64_neon_tbl3:
3424 case Intrinsic::aarch64_neon_tbl4:
3425 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3426 case Intrinsic::arm_neon_vtbx1:
3427 case Intrinsic::arm_neon_vtbx2:
3428 case Intrinsic::arm_neon_vtbx3:
3429 case Intrinsic::arm_neon_vtbx4:
3430 case Intrinsic::aarch64_neon_tbx1:
3431 case Intrinsic::aarch64_neon_tbx2:
3432 case Intrinsic::aarch64_neon_tbx3:
3433 case Intrinsic::aarch64_neon_tbx4:
3434 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3435
3436 case Intrinsic::arm_neon_vmulls:
3437 case Intrinsic::arm_neon_vmullu:
3438 case Intrinsic::aarch64_neon_smull:
3439 case Intrinsic::aarch64_neon_umull: {
3440 Value *Arg0 = II->getArgOperand(0);
3441 Value *Arg1 = II->getArgOperand(1);
3442
3443 // Handle mul by zero first:
3445 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3446 }
3447
3448 // Check for constant LHS & RHS - in this case we just simplify.
3449 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3450 IID == Intrinsic::aarch64_neon_umull);
3451 VectorType *NewVT = cast<VectorType>(II->getType());
3452 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3453 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3454 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3455 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3456 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3457 }
3458
3459 // Couldn't simplify - canonicalize constant to the RHS.
3460 std::swap(Arg0, Arg1);
3461 }
3462
3463 // Handle mul by one:
3464 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3465 if (ConstantInt *Splat =
3466 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3467 if (Splat->isOne())
3468 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3469 /*isSigned=*/!Zext);
3470
3471 break;
3472 }
3473 case Intrinsic::arm_neon_aesd:
3474 case Intrinsic::arm_neon_aese:
3475 case Intrinsic::aarch64_crypto_aesd:
3476 case Intrinsic::aarch64_crypto_aese:
3477 case Intrinsic::aarch64_sve_aesd:
3478 case Intrinsic::aarch64_sve_aese: {
3479 Value *DataArg = II->getArgOperand(0);
3480 Value *KeyArg = II->getArgOperand(1);
3481
3482 // Accept zero on either operand.
3483 if (!match(KeyArg, m_ZeroInt()))
3484 std::swap(KeyArg, DataArg);
3485
3486 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3487 Value *Data, *Key;
3488 if (match(KeyArg, m_ZeroInt()) &&
3489 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3490 replaceOperand(*II, 0, Data);
3491 replaceOperand(*II, 1, Key);
3492 return II;
3493 }
3494 break;
3495 }
3496 case Intrinsic::arm_neon_vshifts:
3497 case Intrinsic::arm_neon_vshiftu:
3498 case Intrinsic::aarch64_neon_sshl:
3499 case Intrinsic::aarch64_neon_ushl:
3500 return foldNeonShift(II, *this);
3501 case Intrinsic::hexagon_V6_vandvrt:
3502 case Intrinsic::hexagon_V6_vandvrt_128B: {
3503 // Simplify Q -> V -> Q conversion.
3504 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3505 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3506 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3507 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3508 break;
3509 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3510 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3511 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3512 // Check if every byte has common bits in Bytes and Mask.
3513 uint64_t C = Bytes1 & Mask1;
3514 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3515 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3516 }
3517 break;
3518 }
3519 case Intrinsic::stackrestore: {
3520 enum class ClassifyResult {
3521 None,
3522 Alloca,
3523 StackRestore,
3524 CallWithSideEffects,
3525 };
3526 auto Classify = [](const Instruction *I) {
3527 if (isa<AllocaInst>(I))
3528 return ClassifyResult::Alloca;
3529
3530 if (auto *CI = dyn_cast<CallInst>(I)) {
3531 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3532 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3533 return ClassifyResult::StackRestore;
3534
3535 if (II->mayHaveSideEffects())
3536 return ClassifyResult::CallWithSideEffects;
3537 } else {
3538 // Consider all non-intrinsic calls to be side effects
3539 return ClassifyResult::CallWithSideEffects;
3540 }
3541 }
3542
3543 return ClassifyResult::None;
3544 };
3545
3546 // If the stacksave and the stackrestore are in the same BB, and there is
3547 // no intervening call, alloca, or stackrestore of a different stacksave,
3548 // remove the restore. This can happen when variable allocas are DCE'd.
3549 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3550 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3551 SS->getParent() == II->getParent()) {
3552 BasicBlock::iterator BI(SS);
3553 bool CannotRemove = false;
3554 for (++BI; &*BI != II; ++BI) {
3555 switch (Classify(&*BI)) {
3556 case ClassifyResult::None:
3557 // So far so good, look at next instructions.
3558 break;
3559
3560 case ClassifyResult::StackRestore:
3561 // If we found an intervening stackrestore for a different
3562 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3563 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3564 CannotRemove = true;
3565 break;
3566
3567 case ClassifyResult::Alloca:
3568 case ClassifyResult::CallWithSideEffects:
3569 // If we found an alloca, a non-intrinsic call, or an intrinsic
3570 // call with side effects, we can't remove the stackrestore.
3571 CannotRemove = true;
3572 break;
3573 }
3574 if (CannotRemove)
3575 break;
3576 }
3577
3578 if (!CannotRemove)
3579 return eraseInstFromFunction(CI);
3580 }
3581 }
3582
3583 // Scan down this block to see if there is another stack restore in the
3584 // same block without an intervening call/alloca.
3586 Instruction *TI = II->getParent()->getTerminator();
3587 bool CannotRemove = false;
3588 for (++BI; &*BI != TI; ++BI) {
3589 switch (Classify(&*BI)) {
3590 case ClassifyResult::None:
3591 // So far so good, look at next instructions.
3592 break;
3593
3594 case ClassifyResult::StackRestore:
3595 // If there is a stackrestore below this one, remove this one.
3596 return eraseInstFromFunction(CI);
3597
3598 case ClassifyResult::Alloca:
3599 case ClassifyResult::CallWithSideEffects:
3600 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3601 // with side effects (such as llvm.stacksave and llvm.read_register),
3602 // we can't remove the stack restore.
3603 CannotRemove = true;
3604 break;
3605 }
3606 if (CannotRemove)
3607 break;
3608 }
3609
3610 // If the stack restore is in a return, resume, or unwind block and if there
3611 // are no allocas or calls between the restore and the return, nuke the
3612 // restore.
3613 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3614 return eraseInstFromFunction(CI);
3615 break;
3616 }
3617 case Intrinsic::lifetime_end:
3618 // Asan needs to poison memory to detect invalid access which is possible
3619 // even for empty lifetime range.
3620 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3621 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3622 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3623 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3624 break;
3625
3626 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3627 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3628 }))
3629 return nullptr;
3630 break;
3631 case Intrinsic::assume: {
3632 Value *IIOperand = II->getArgOperand(0);
3633
3634 // Canonicalize assume(a && b) -> assume(a); assume(b);
3635 // Note: New assumption intrinsics created here are registered by
3636 // the InstCombineIRInserter object.
3637 Value *A, *B;
3638 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3639 Builder.CreateAssumption(A);
3640 Builder.CreateAssumption(B);
3641 return eraseInstFromFunction(*II);
3642 }
3643 // assume(!(a || b)) -> assume(!a); assume(!b);
3644 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3645 Builder.CreateAssumption(Builder.CreateNot(A));
3646 Builder.CreateAssumption(Builder.CreateNot(B));
3647 return eraseInstFromFunction(*II);
3648 }
3649
3650 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3651 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3652
3653 // Separate storage assumptions apply to the underlying allocations, not
3654 // any particular pointer within them. When evaluating the hints for AA
3655 // purposes we getUnderlyingObject them; by precomputing the answers here
3656 // we can avoid having to do so repeatedly there.
3657 if (OBU.getTagName() == "separate_storage") {
3658 assert(OBU.Inputs.size() == 2);
3659 auto MaybeSimplifyHint = [&](const Use &U) {
3660 Value *Hint = U.get();
3661 // Not having a limit is safe because InstCombine removes unreachable
3662 // code.
3663 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3664 if (Hint != UnderlyingObject)
3665 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3666 };
3667 MaybeSimplifyHint(OBU.Inputs[0]);
3668 MaybeSimplifyHint(OBU.Inputs[1]);
3669 }
3670
3671 // Try to remove redundant alignment assumptions.
3672 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3674 *cast<AssumeInst>(II), II->arg_size() + Idx);
3675 if (!RK || RK.AttrKind != Attribute::Alignment ||
3677 continue;
3678
3679 // Remove align 1 bundles; they don't add any useful information.
3680 if (RK.ArgValue == 1)
3682
3683 // Don't try to remove align assumptions for pointers derived from
3684 // arguments. We might lose information if the function gets inline and
3685 // the align argument attribute disappears.
3687 if (!UO || isa<Argument>(UO))
3688 continue;
3689
3690 // Compute known bits for the pointer and drop the assume if the
3691 // known alignment isn't increased by it.
3692 if ((1ULL << computeKnownBits(RK.WasOn, II).countMinTrailingZeros()) <
3693 RK.ArgValue)
3694 continue;
3696 }
3697
3698 if (OBU.getTagName() == "nonnull" && OBU.Inputs.size() == 1) {
3700 *cast<AssumeInst>(II), II->arg_size() + Idx);
3701 if (!RK || RK.AttrKind != Attribute::NonNull)
3702 continue;
3703
3704 // Drop assume if we can prove nonnull without it
3705 if (isKnownNonZero(RK.WasOn, getSimplifyQuery().getWithInstruction(II)))
3707
3708 // Fold the assume into metadata if it's valid at the load
3709 if (auto *LI = dyn_cast<LoadInst>(RK.WasOn);
3710 LI &&
3711 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3712 MDNode *MD = MDNode::get(II->getContext(), {});
3713 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3714 LI->setMetadata(LLVMContext::MD_noundef, MD);
3716 }
3717
3718 // TODO: apply nonnull return attributes to calls and invokes
3719 }
3720 }
3721
3722 // Convert nonnull assume like:
3723 // %A = icmp ne i32* %PTR, null
3724 // call void @llvm.assume(i1 %A)
3725 // into
3726 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3727 if (match(IIOperand,
3729 A->getType()->isPointerTy()) {
3730 Builder.CreateNonnullAssumption(A);
3731 return eraseInstFromFunction(*II);
3732 }
3733
3734 // Convert alignment assume like:
3735 // %B = ptrtoint i32* %A to i64
3736 // %C = and i64 %B, Constant
3737 // %D = icmp eq i64 %C, 0
3738 // call void @llvm.assume(i1 %D)
3739 // into
3740 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3741 uint64_t AlignMask = 1;
3742 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3743 match(IIOperand,
3745 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3746 m_Zero())))) {
3747 if (isPowerOf2_64(AlignMask + 1)) {
3748 uint64_t Offset = 0;
3750 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3751 /// Note: this doesn't preserve the offset information but merges
3752 /// offset and alignment.
3753 /// TODO: we can generate a GEP instead of merging the alignment with
3754 /// the offset.
3755 Builder.CreateAlignmentAssumption(getDataLayout(), A,
3756 MinAlign(Offset, AlignMask + 1));
3757 return eraseInstFromFunction(*II);
3758 }
3759 }
3760 }
3761
3762 /// Canonicalize Knowledge in operand bundles.
3763 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3764 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3765 auto &BOI = II->bundle_op_info_begin()[Idx];
3768 if (BOI.End - BOI.Begin > 2)
3769 continue; // Prevent reducing knowledge in an align with offset since
3770 // extracting a RetainedKnowledge from them looses offset
3771 // information
3772 RetainedKnowledge CanonRK =
3775 &getDominatorTree());
3776 if (CanonRK == RK)
3777 continue;
3778 if (!CanonRK) {
3779 if (BOI.End - BOI.Begin > 0) {
3780 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3781 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3782 }
3783 continue;
3784 }
3785 assert(RK.AttrKind == CanonRK.AttrKind);
3786 if (BOI.End - BOI.Begin > 0)
3787 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3788 if (BOI.End - BOI.Begin > 1)
3789 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3790 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3791 if (RK.WasOn)
3792 Worklist.pushValue(RK.WasOn);
3793 return II;
3794 }
3795 }
3796
3797 // If there is a dominating assume with the same condition as this one,
3798 // then this one is redundant, and should be removed.
3799 KnownBits Known(1);
3800 computeKnownBits(IIOperand, Known, II);
3802 return eraseInstFromFunction(*II);
3803
3804 // assume(false) is unreachable.
3805 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3807 return eraseInstFromFunction(*II);
3808 }
3809
3810 // Update the cache of affected values for this assumption (we might be
3811 // here because we just simplified the condition).
3812 AC.updateAffectedValues(cast<AssumeInst>(II));
3813 break;
3814 }
3815 case Intrinsic::experimental_guard: {
3816 // Is this guard followed by another guard? We scan forward over a small
3817 // fixed window of instructions to handle common cases with conditions
3818 // computed between guards.
3819 Instruction *NextInst = II->getNextNode();
3820 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3821 // Note: Using context-free form to avoid compile time blow up
3822 if (!isSafeToSpeculativelyExecute(NextInst))
3823 break;
3824 NextInst = NextInst->getNextNode();
3825 }
3826 Value *NextCond = nullptr;
3827 if (match(NextInst,
3829 Value *CurrCond = II->getArgOperand(0);
3830
3831 // Remove a guard that it is immediately preceded by an identical guard.
3832 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3833 if (CurrCond != NextCond) {
3834 Instruction *MoveI = II->getNextNode();
3835 while (MoveI != NextInst) {
3836 auto *Temp = MoveI;
3837 MoveI = MoveI->getNextNode();
3838 Temp->moveBefore(II->getIterator());
3839 }
3840 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3841 }
3842 eraseInstFromFunction(*NextInst);
3843 return II;
3844 }
3845 break;
3846 }
3847 case Intrinsic::vector_insert: {
3848 Value *Vec = II->getArgOperand(0);
3849 Value *SubVec = II->getArgOperand(1);
3850 Value *Idx = II->getArgOperand(2);
3851 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3852 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3853 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3854
3855 // Only canonicalize if the destination vector, Vec, and SubVec are all
3856 // fixed vectors.
3857 if (DstTy && VecTy && SubVecTy) {
3858 unsigned DstNumElts = DstTy->getNumElements();
3859 unsigned VecNumElts = VecTy->getNumElements();
3860 unsigned SubVecNumElts = SubVecTy->getNumElements();
3861 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3862
3863 // An insert that entirely overwrites Vec with SubVec is a nop.
3864 if (VecNumElts == SubVecNumElts)
3865 return replaceInstUsesWith(CI, SubVec);
3866
3867 // Widen SubVec into a vector of the same width as Vec, since
3868 // shufflevector requires the two input vectors to be the same width.
3869 // Elements beyond the bounds of SubVec within the widened vector are
3870 // undefined.
3871 SmallVector<int, 8> WidenMask;
3872 unsigned i;
3873 for (i = 0; i != SubVecNumElts; ++i)
3874 WidenMask.push_back(i);
3875 for (; i != VecNumElts; ++i)
3876 WidenMask.push_back(PoisonMaskElem);
3877
3878 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3879
3881 for (unsigned i = 0; i != IdxN; ++i)
3882 Mask.push_back(i);
3883 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3884 Mask.push_back(i);
3885 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3886 Mask.push_back(i);
3887
3888 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3889 return replaceInstUsesWith(CI, Shuffle);
3890 }
3891 break;
3892 }
3893 case Intrinsic::vector_extract: {
3894 Value *Vec = II->getArgOperand(0);
3895 Value *Idx = II->getArgOperand(1);
3896
3897 Type *ReturnType = II->getType();
3898 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3899 // ExtractIdx)
3900 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3901 Value *InsertTuple, *InsertIdx, *InsertValue;
3903 m_Value(InsertValue),
3904 m_Value(InsertIdx))) &&
3905 InsertValue->getType() == ReturnType) {
3906 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3907 // Case where we get the same index right after setting it.
3908 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3909 // InsertValue
3910 if (ExtractIdx == Index)
3911 return replaceInstUsesWith(CI, InsertValue);
3912 // If we are getting a different index than what was set in the
3913 // insert.vector intrinsic. We can just set the input tuple to the one up
3914 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3915 // InsertIndex), ExtractIndex)
3916 // --> extract.vector(InsertTuple, ExtractIndex)
3917 else
3918 return replaceOperand(CI, 0, InsertTuple);
3919 }
3920
3921 ConstantInt *ALMUpperBound;
3923 m_Value(), m_ConstantInt(ALMUpperBound)))) {
3924 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3925 unsigned VScaleMin = Attrs.getVScaleRangeMin();
3926 unsigned ScaleFactor =
3927 cast<VectorType>(ReturnType)->isScalableTy() ? VScaleMin : 1;
3928 if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
3929 return replaceInstUsesWith(CI,
3930 ConstantVector::getNullValue(ReturnType));
3931 }
3932
3933 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3934 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3935
3936 if (DstTy && VecTy) {
3937 auto DstEltCnt = DstTy->getElementCount();
3938 auto VecEltCnt = VecTy->getElementCount();
3939 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3940
3941 // Extracting the entirety of Vec is a nop.
3942 if (DstEltCnt == VecTy->getElementCount()) {
3943 replaceInstUsesWith(CI, Vec);
3944 return eraseInstFromFunction(CI);
3945 }
3946
3947 // Only canonicalize to shufflevector if the destination vector and
3948 // Vec are fixed vectors.
3949 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3950 break;
3951
3953 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3954 Mask.push_back(IdxN + i);
3955
3956 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3957 return replaceInstUsesWith(CI, Shuffle);
3958 }
3959 break;
3960 }
3961 case Intrinsic::experimental_vp_reverse: {
3962 Value *X;
3963 Value *Vec = II->getArgOperand(0);
3964 Value *Mask = II->getArgOperand(1);
3965 if (!match(Mask, m_AllOnes()))
3966 break;
3967 Value *EVL = II->getArgOperand(2);
3968 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3969 // rev(unop rev(X)) --> unop X
3970 if (match(Vec,
3972 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3973 auto *OldUnOp = cast<UnaryOperator>(Vec);
3975 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3976 II->getIterator());
3977 return replaceInstUsesWith(CI, NewUnOp);
3978 }
3979 break;
3980 }
3981 case Intrinsic::vector_reduce_or:
3982 case Intrinsic::vector_reduce_and: {
3983 // Canonicalize logical or/and reductions:
3984 // Or reduction for i1 is represented as:
3985 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3986 // %res = cmp ne iReduxWidth %val, 0
3987 // And reduction for i1 is represented as:
3988 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3989 // %res = cmp eq iReduxWidth %val, 11111
3990 Value *Arg = II->getArgOperand(0);
3991 Value *Vect;
3992
3993 if (Value *NewOp =
3994 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3995 replaceUse(II->getOperandUse(0), NewOp);
3996 return II;
3997 }
3998
3999 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4000 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4001 if (FTy->getElementType() == Builder.getInt1Ty()) {
4002 Value *Res = Builder.CreateBitCast(
4003 Vect, Builder.getIntNTy(FTy->getNumElements()));
4004 if (IID == Intrinsic::vector_reduce_and) {
4005 Res = Builder.CreateICmpEQ(
4007 } else {
4008 assert(IID == Intrinsic::vector_reduce_or &&
4009 "Expected or reduction.");
4010 Res = Builder.CreateIsNotNull(Res);
4011 }
4012 if (Arg != Vect)
4013 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4014 II->getType());
4015 return replaceInstUsesWith(CI, Res);
4016 }
4017 }
4018 [[fallthrough]];
4019 }
4020 case Intrinsic::vector_reduce_add: {
4021 if (IID == Intrinsic::vector_reduce_add) {
4022 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4023 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4024 // Convert vector_reduce_add(SExt(<n x i1>)) to
4025 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4026 // Convert vector_reduce_add(<n x i1>) to
4027 // Trunc(ctpop(bitcast <n x i1> to in)).
4028 Value *Arg = II->getArgOperand(0);
4029 Value *Vect;
4030
4031 if (Value *NewOp =
4032 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4033 replaceUse(II->getOperandUse(0), NewOp);
4034 return II;
4035 }
4036
4037 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4038 if (Value *Splat = getSplatValue(Arg)) {
4039 ElementCount VecToReduceCount =
4040 cast<VectorType>(Arg->getType())->getElementCount();
4041 if (VecToReduceCount.isFixed()) {
4042 unsigned VectorSize = VecToReduceCount.getFixedValue();
4043 return BinaryOperator::CreateMul(
4044 Splat,
4045 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4046 /*ImplicitTrunc=*/true));
4047 }
4048 }
4049
4050 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4051 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4052 if (FTy->getElementType() == Builder.getInt1Ty()) {
4053 Value *V = Builder.CreateBitCast(
4054 Vect, Builder.getIntNTy(FTy->getNumElements()));
4055 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4056 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4057 if (Arg != Vect &&
4058 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4059 Res = Builder.CreateNeg(Res);
4060 return replaceInstUsesWith(CI, Res);
4061 }
4062 }
4063 }
4064 [[fallthrough]];
4065 }
4066 case Intrinsic::vector_reduce_xor: {
4067 if (IID == Intrinsic::vector_reduce_xor) {
4068 // Exclusive disjunction reduction over the vector with
4069 // (potentially-extended) i1 element type is actually a
4070 // (potentially-extended) arithmetic `add` reduction over the original
4071 // non-extended value:
4072 // vector_reduce_xor(?ext(<n x i1>))
4073 // -->
4074 // ?ext(vector_reduce_add(<n x i1>))
4075 Value *Arg = II->getArgOperand(0);
4076 Value *Vect;
4077
4078 if (Value *NewOp =
4079 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4080 replaceUse(II->getOperandUse(0), NewOp);
4081 return II;
4082 }
4083
4084 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4085 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4086 if (VTy->getElementType() == Builder.getInt1Ty()) {
4087 Value *Res = Builder.CreateAddReduce(Vect);
4088 if (Arg != Vect)
4089 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4090 II->getType());
4091 return replaceInstUsesWith(CI, Res);
4092 }
4093 }
4094 }
4095 [[fallthrough]];
4096 }
4097 case Intrinsic::vector_reduce_mul: {
4098 if (IID == Intrinsic::vector_reduce_mul) {
4099 Value *Arg = II->getArgOperand(0);
4100
4101 if (Value *NewOp =
4102 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4103 replaceUse(II->getOperandUse(0), NewOp);
4104 return II;
4105 }
4106
4107 // vector_reduce_mul(zext(<n x i1>)), or
4108 // vector_reduce_mul(sext(<n x i1>)) (if n is even) -->
4109 // zext(vector_reduce_and(<n x i1>)).
4110 // (The sext case doesn't work if n is odd because multiplying an odd
4111 // number of -1's produces -1, not 1.)
4112 Value *Vect;
4113 bool IsZext = match(Arg, m_ZExt(m_Value(Vect))) &&
4114 Vect->getType()->isIntOrIntVectorTy(1);
4115 bool IsSext =
4116 match(Arg, m_SExt(m_Value(Vect))) &&
4117 Vect->getType()->isIntOrIntVectorTy(1) &&
4118 cast<VectorType>(Vect->getType())->getElementCount().isKnownEven();
4119 if (IsZext || IsSext) {
4120 Value *Res = Builder.CreateAndReduce(Vect);
4121 return CastInst::Create(Instruction::ZExt, Res, II->getType());
4122 }
4123
4124 // vector_reduce_mul(<n x i1>) --> vector_reduce_and(<n x i1>)
4125 if (Arg->getType()->isIntOrIntVectorTy(1))
4126 return replaceInstUsesWith(CI, Builder.CreateAndReduce(Arg));
4127 }
4128 [[fallthrough]];
4129 }
4130 case Intrinsic::vector_reduce_umin:
4131 case Intrinsic::vector_reduce_umax: {
4132 if (IID == Intrinsic::vector_reduce_umin ||
4133 IID == Intrinsic::vector_reduce_umax) {
4134 // UMin/UMax reduction over the vector with (potentially-extended)
4135 // i1 element type is actually a (potentially-extended)
4136 // logical `and`/`or` reduction over the original non-extended value:
4137 // vector_reduce_u{min,max}(?ext(<n x i1>))
4138 // -->
4139 // ?ext(vector_reduce_{and,or}(<n x i1>))
4140 Value *Arg = II->getArgOperand(0);
4141 Value *Vect;
4142
4143 if (Value *NewOp =
4144 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4145 replaceUse(II->getOperandUse(0), NewOp);
4146 return II;
4147 }
4148
4149 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4150 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4151 if (VTy->getElementType() == Builder.getInt1Ty()) {
4152 Value *Res = IID == Intrinsic::vector_reduce_umin
4153 ? Builder.CreateAndReduce(Vect)
4154 : Builder.CreateOrReduce(Vect);
4155 if (Arg != Vect)
4156 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4157 II->getType());
4158 return replaceInstUsesWith(CI, Res);
4159 }
4160 }
4161 }
4162 [[fallthrough]];
4163 }
4164 case Intrinsic::vector_reduce_smin:
4165 case Intrinsic::vector_reduce_smax: {
4166 if (IID == Intrinsic::vector_reduce_smin ||
4167 IID == Intrinsic::vector_reduce_smax) {
4168 // SMin/SMax reduction over the vector with (potentially-extended)
4169 // i1 element type is actually a (potentially-extended)
4170 // logical `and`/`or` reduction over the original non-extended value:
4171 // vector_reduce_s{min,max}(<n x i1>)
4172 // -->
4173 // vector_reduce_{or,and}(<n x i1>)
4174 // and
4175 // vector_reduce_s{min,max}(sext(<n x i1>))
4176 // -->
4177 // sext(vector_reduce_{or,and}(<n x i1>))
4178 // and
4179 // vector_reduce_s{min,max}(zext(<n x i1>))
4180 // -->
4181 // zext(vector_reduce_{and,or}(<n x i1>))
4182 Value *Arg = II->getArgOperand(0);
4183 Value *Vect;
4184
4185 if (Value *NewOp =
4186 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4187 replaceUse(II->getOperandUse(0), NewOp);
4188 return II;
4189 }
4190
4191 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4192 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4193 if (VTy->getElementType() == Builder.getInt1Ty()) {
4194 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4195 if (Arg != Vect)
4196 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4197 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4198 (ExtOpc == Instruction::CastOps::ZExt))
4199 ? Builder.CreateAndReduce(Vect)
4200 : Builder.CreateOrReduce(Vect);
4201 if (Arg != Vect)
4202 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4203 return replaceInstUsesWith(CI, Res);
4204 }
4205 }
4206 }
4207 [[fallthrough]];
4208 }
4209 case Intrinsic::vector_reduce_fmax:
4210 case Intrinsic::vector_reduce_fmin:
4211 case Intrinsic::vector_reduce_fadd:
4212 case Intrinsic::vector_reduce_fmul: {
4213 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4214 IID != Intrinsic::vector_reduce_fmul) ||
4215 II->hasAllowReassoc();
4216 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4217 IID == Intrinsic::vector_reduce_fmul)
4218 ? 1
4219 : 0;
4220 Value *Arg = II->getArgOperand(ArgIdx);
4221 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4222 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4223 return nullptr;
4224 }
4225 break;
4226 }
4227 case Intrinsic::is_fpclass: {
4228 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4229 return I;
4230 break;
4231 }
4232 case Intrinsic::threadlocal_address: {
4233 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4234 MaybeAlign Align = II->getRetAlign();
4235 if (MinAlign > Align.valueOrOne()) {
4236 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4237 return II;
4238 }
4239 break;
4240 }
4241 case Intrinsic::fptoui_sat:
4242 case Intrinsic::fptosi_sat:
4243 if (Instruction *I = foldItoFPtoI(*II))
4244 return I;
4245 break;
4246 case Intrinsic::frexp: {
4247 Value *X;
4248 // The first result is idempotent with the added complication of the struct
4249 // return, and the second result is zero because the value is already
4250 // normalized.
4251 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
4253 X = Builder.CreateInsertValue(
4254 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
4255 1);
4256 return replaceInstUsesWith(*II, X);
4257 }
4258 }
4259 break;
4260 }
4261 case Intrinsic::get_active_lane_mask: {
4262 const APInt *Op0, *Op1;
4263 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4264 match(II->getOperand(1), m_APInt(Op1))) {
4265 Type *OpTy = II->getOperand(0)->getType();
4266 return replaceInstUsesWith(
4267 *II, Builder.CreateIntrinsic(
4268 II->getType(), Intrinsic::get_active_lane_mask,
4269 {Constant::getNullValue(OpTy),
4270 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4271 }
4272 break;
4273 }
4274 case Intrinsic::experimental_get_vector_length: {
4275 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4276 unsigned BitWidth =
4277 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4278 II->getType()->getScalarSizeInBits());
4279 ConstantRange Cnt =
4280 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4281 SQ.getWithInstruction(II))
4283 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4284 ->getValue()
4285 .zextOrTrunc(Cnt.getBitWidth());
4286 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4287 MaxLanes = MaxLanes.multiply(
4288 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4289
4290 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4291 return replaceInstUsesWith(
4292 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4293 return nullptr;
4294 }
4295 default: {
4296 // Handle target specific intrinsics
4297 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4298 if (V)
4299 return *V;
4300 break;
4301 }
4302 }
4303
4304 // Try to fold intrinsic into select/phi operands. This is legal if:
4305 // * The intrinsic is speculatable.
4306 // * The operand is one of the following:
4307 // - a phi.
4308 // - a select with a scalar condition.
4309 // - a select with a vector condition and II is not a cross lane operation.
4311 for (Value *Op : II->args()) {
4312 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4313 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4314 if (IsVectorCond &&
4315 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4316 continue;
4317 // Don't replace a scalar select with a more expensive vector select if
4318 // we can't simplify both arms of the select.
4319 bool SimplifyBothArms =
4320 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4322 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4323 return R;
4324 }
4325 if (auto *Phi = dyn_cast<PHINode>(Op))
4326 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4327 return R;
4328 }
4329 }
4330
4332 return Shuf;
4333
4335 return replaceInstUsesWith(*II, Reverse);
4336
4338 return replaceInstUsesWith(*II, Res);
4339
4340 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4341 // context, so it is handled in visitCallBase and we should trigger it.
4342 return visitCallBase(*II);
4343}
4344
4345// Fence instruction simplification
4347 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4348 // This check is solely here to handle arbitrary target-dependent syncscopes.
4349 // TODO: Can remove if does not matter in practice.
4350 if (NFI && FI.isIdenticalTo(NFI))
4351 return eraseInstFromFunction(FI);
4352
4353 // Returns true if FI1 is identical or stronger fence than FI2.
4354 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4355 auto FI1SyncScope = FI1->getSyncScopeID();
4356 // Consider same scope, where scope is global or single-thread.
4357 if (FI1SyncScope != FI2->getSyncScopeID() ||
4358 (FI1SyncScope != SyncScope::System &&
4359 FI1SyncScope != SyncScope::SingleThread))
4360 return false;
4361
4362 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4363 };
4364 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4365 return eraseInstFromFunction(FI);
4366
4367 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4368 if (isIdenticalOrStrongerFence(PFI, &FI))
4369 return eraseInstFromFunction(FI);
4370 return nullptr;
4371}
4372
4373// InvokeInst simplification
4375 return visitCallBase(II);
4376}
4377
4378// CallBrInst simplification
4380 return visitCallBase(CBI);
4381}
4382
4384 if (!CI->hasFnAttr("modular-format"))
4385 return nullptr;
4386
4388 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4389 // TODO: Make use of the first two arguments
4390 unsigned FirstArgIdx;
4391 [[maybe_unused]] bool Error;
4392 Error = Args[2].getAsInteger(10, FirstArgIdx);
4393 assert(!Error && "invalid first arg index");
4394 if (FirstArgIdx == 0)
4395 return nullptr;
4396 --FirstArgIdx;
4397 StringRef FnName = Args[3];
4398 StringRef ImplName = Args[4];
4400
4401 if (AllAspects.empty())
4402 return nullptr;
4403
4404 SmallVector<StringRef> NeededAspects;
4405 for (StringRef Aspect : AllAspects) {
4406 if (Aspect == "float") {
4407 if (llvm::any_of(
4408 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4409 CI->arg_end()),
4410 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4411 NeededAspects.push_back("float");
4412 } else {
4413 // Unknown aspects are always considered to be needed.
4414 NeededAspects.push_back(Aspect);
4415 }
4416 }
4417
4418 if (NeededAspects.size() == AllAspects.size())
4419 return nullptr;
4420
4421 Module *M = CI->getModule();
4422 LLVMContext &Ctx = M->getContext();
4423 Function *Callee = CI->getCalledFunction();
4424 FunctionCallee ModularFn = M->getOrInsertFunction(
4425 FnName, Callee->getFunctionType(),
4426 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4427 CallInst *New = cast<CallInst>(CI->clone());
4428 New->setCalledFunction(ModularFn);
4429 New->removeFnAttr("modular-format");
4430 B.Insert(New);
4431
4432 const auto ReferenceAspect = [&](StringRef Aspect) {
4433 SmallString<20> Name = ImplName;
4434 Name += '_';
4435 Name += Aspect;
4436 Function *RelocNoneFn =
4437 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4438 B.CreateCall(RelocNoneFn,
4439 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4440 };
4441
4442 llvm::sort(NeededAspects);
4443 for (StringRef Request : NeededAspects)
4444 ReferenceAspect(Request);
4445
4446 return New;
4447}
4448
4449Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4450 if (!CI->getCalledFunction()) return nullptr;
4451
4452 // Skip optimizing notail and musttail calls so
4453 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4454 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4455 if (CI->isMustTailCall() || CI->isNoTailCall())
4456 return nullptr;
4457
4458 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4459 replaceInstUsesWith(*From, With);
4460 };
4461 auto InstCombineErase = [this](Instruction *I) {
4463 };
4464 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4465 InstCombineRAUW, InstCombineErase);
4466 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4467 ++NumSimplified;
4468 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4469 }
4470 if (Value *With = optimizeModularFormat(CI, Builder)) {
4471 ++NumSimplified;
4472 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4473 }
4474
4475 return nullptr;
4476}
4477
4479 // Strip off at most one level of pointer casts, looking for an alloca. This
4480 // is good enough in practice and simpler than handling any number of casts.
4481 Value *Underlying = TrampMem->stripPointerCasts();
4482 if (Underlying != TrampMem &&
4483 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4484 return nullptr;
4485 if (!isa<AllocaInst>(Underlying))
4486 return nullptr;
4487
4488 IntrinsicInst *InitTrampoline = nullptr;
4489 for (User *U : TrampMem->users()) {
4491 if (!II)
4492 return nullptr;
4493 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4494 if (InitTrampoline)
4495 // More than one init_trampoline writes to this value. Give up.
4496 return nullptr;
4497 InitTrampoline = II;
4498 continue;
4499 }
4500 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4501 // Allow any number of calls to adjust.trampoline.
4502 continue;
4503 return nullptr;
4504 }
4505
4506 // No call to init.trampoline found.
4507 if (!InitTrampoline)
4508 return nullptr;
4509
4510 // Check that the alloca is being used in the expected way.
4511 if (InitTrampoline->getOperand(0) != TrampMem)
4512 return nullptr;
4513
4514 return InitTrampoline;
4515}
4516
4518 Value *TrampMem) {
4519 // Visit all the previous instructions in the basic block, and try to find a
4520 // init.trampoline which has a direct path to the adjust.trampoline.
4521 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4522 E = AdjustTramp->getParent()->begin();
4523 I != E;) {
4524 Instruction *Inst = &*--I;
4526 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4527 II->getOperand(0) == TrampMem)
4528 return II;
4529 if (Inst->mayWriteToMemory())
4530 return nullptr;
4531 }
4532 return nullptr;
4533}
4534
4535// Given a call to llvm.adjust.trampoline, find and return the corresponding
4536// call to llvm.init.trampoline if the call to the trampoline can be optimized
4537// to a direct call to a function. Otherwise return NULL.
4539 Callee = Callee->stripPointerCasts();
4540 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4541 if (!AdjustTramp ||
4542 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4543 return nullptr;
4544
4545 Value *TrampMem = AdjustTramp->getOperand(0);
4546
4548 return IT;
4549 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4550 return IT;
4551 return nullptr;
4552}
4553
4554Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4555 const Value *Callee = Call.getCalledOperand();
4556 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4557 if (!IPC || !IPC->isNoopCast(DL))
4558 return nullptr;
4559
4560 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4561 if (!II)
4562 return nullptr;
4563
4564 Intrinsic::ID IIID = II->getIntrinsicID();
4565 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4566 return nullptr;
4567
4568 // Isolate the ptrauth bundle from the others.
4569 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4571 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4572 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4573 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4574 PtrAuthBundleOrNone = Bundle;
4575 else
4576 NewBundles.emplace_back(Bundle);
4577 }
4578
4579 if (!PtrAuthBundleOrNone)
4580 return nullptr;
4581
4582 Value *NewCallee = nullptr;
4583 switch (IIID) {
4584 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4585 // assuming the call bundle and the sign operands match.
4586 case Intrinsic::ptrauth_resign: {
4587 // Resign result key should match bundle.
4588 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4589 return nullptr;
4590 // Resign result discriminator should match bundle.
4591 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4592 return nullptr;
4593
4594 // Resign input (auth) key should also match: we can't change the key on
4595 // the new call we're generating, because we don't know what keys are valid.
4596 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4597 return nullptr;
4598
4599 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4600 NewBundles.emplace_back("ptrauth", NewBundleOps);
4601 NewCallee = II->getOperand(0);
4602 break;
4603 }
4604
4605 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4606 // assuming the call bundle and the sign operands match.
4607 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4608 case Intrinsic::ptrauth_sign: {
4609 // Sign key should match bundle.
4610 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4611 return nullptr;
4612 // Sign discriminator should match bundle.
4613 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4614 return nullptr;
4615 NewCallee = II->getOperand(0);
4616 break;
4617 }
4618 default:
4619 llvm_unreachable("unexpected intrinsic ID");
4620 }
4621
4622 if (!NewCallee)
4623 return nullptr;
4624
4625 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4626 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4627 NewCall->setCalledOperand(NewCallee);
4628 return NewCall;
4629}
4630
4631Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4633 if (!CPA)
4634 return nullptr;
4635
4636 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4637 // If the ptrauth constant isn't based on a function pointer, bail out.
4638 if (!CalleeF)
4639 return nullptr;
4640
4641 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4643 if (!PAB)
4644 return nullptr;
4645
4646 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4647 Value *Discriminator = PAB->Inputs[1];
4648
4649 // If the bundle doesn't match, this is probably going to fail to auth.
4650 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4651 return nullptr;
4652
4653 // If the bundle matches the constant, proceed in making this a direct call.
4655 NewCall->setCalledOperand(CalleeF);
4656 return NewCall;
4657}
4658
4659bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4660 const TargetLibraryInfo *TLI) {
4661 // Note: We only handle cases which can't be driven from generic attributes
4662 // here. So, for example, nonnull and noalias (which are common properties
4663 // of some allocation functions) are expected to be handled via annotation
4664 // of the respective allocator declaration with generic attributes.
4665 bool Changed = false;
4666
4667 if (!Call.getType()->isPointerTy())
4668 return Changed;
4669
4670 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4671 if (Size && *Size != 0) {
4672 // TODO: We really should just emit deref_or_null here and then
4673 // let the generic inference code combine that with nonnull.
4674 if (Call.hasRetAttr(Attribute::NonNull)) {
4675 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4677 Call.getContext(), Size->getLimitedValue()));
4678 } else {
4679 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4681 Call.getContext(), Size->getLimitedValue()));
4682 }
4683 }
4684
4685 // Add alignment attribute if alignment is a power of two constant.
4686 Value *Alignment = getAllocAlignment(&Call, TLI);
4687 if (!Alignment)
4688 return Changed;
4689
4690 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4691 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4692 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4693 if (llvm::isPowerOf2_64(AlignmentVal)) {
4694 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4695 Align NewAlign = Align(AlignmentVal);
4696 if (NewAlign > ExistingAlign) {
4699 Changed = true;
4700 }
4701 }
4702 }
4703 return Changed;
4704}
4705
4706/// Improvements for call, callbr and invoke instructions.
4707Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4708 bool Changed = annotateAnyAllocSite(Call, &TLI);
4709
4710 // Mark any parameters that are known to be non-null with the nonnull
4711 // attribute. This is helpful for inlining calls to functions with null
4712 // checks on their arguments.
4713 SmallVector<unsigned, 4> ArgNos;
4714 unsigned ArgNo = 0;
4715
4716 for (Value *V : Call.args()) {
4717 if (V->getType()->isPointerTy()) {
4718 // Simplify the nonnull operand if the parameter is known to be nonnull.
4719 // Otherwise, try to infer nonnull for it.
4720 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4721 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4722 (HasDereferenceable &&
4724 V->getType()->getPointerAddressSpace()))) {
4725 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4726 replaceOperand(Call, ArgNo, Res);
4727 Changed = true;
4728 }
4729 } else if (isKnownNonZero(V,
4730 getSimplifyQuery().getWithInstruction(&Call))) {
4731 ArgNos.push_back(ArgNo);
4732 }
4733 }
4734 ArgNo++;
4735 }
4736
4737 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4738
4739 if (!ArgNos.empty()) {
4740 AttributeList AS = Call.getAttributes();
4741 LLVMContext &Ctx = Call.getContext();
4742 AS = AS.addParamAttribute(Ctx, ArgNos,
4743 Attribute::get(Ctx, Attribute::NonNull));
4744 Call.setAttributes(AS);
4745 Changed = true;
4746 }
4747
4748 // If the callee is a pointer to a function, attempt to move any casts to the
4749 // arguments of the call/callbr/invoke.
4751 Function *CalleeF = dyn_cast<Function>(Callee);
4752 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4753 transformConstExprCastCall(Call))
4754 return nullptr;
4755
4756 if (CalleeF) {
4757 // Remove the convergent attr on calls when the callee is not convergent.
4758 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4759 !CalleeF->isIntrinsic()) {
4760 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4761 << "\n");
4763 return &Call;
4764 }
4765
4766 // If the call and callee calling conventions don't match, and neither one
4767 // of the calling conventions is compatible with C calling convention
4768 // this call must be unreachable, as the call is undefined.
4769 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4770 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4774 // Only do this for calls to a function with a body. A prototype may
4775 // not actually end up matching the implementation's calling conv for a
4776 // variety of reasons (e.g. it may be written in assembly).
4777 !CalleeF->isDeclaration()) {
4778 Instruction *OldCall = &Call;
4780 // If OldCall does not return void then replaceInstUsesWith poison.
4781 // This allows ValueHandlers and custom metadata to adjust itself.
4782 if (!OldCall->getType()->isVoidTy())
4783 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4784 if (isa<CallInst>(OldCall))
4785 return eraseInstFromFunction(*OldCall);
4786
4787 // We cannot remove an invoke or a callbr, because it would change thexi
4788 // CFG, just change the callee to a null pointer.
4789 cast<CallBase>(OldCall)->setCalledFunction(
4790 CalleeF->getFunctionType(),
4791 Constant::getNullValue(CalleeF->getType()));
4792 return nullptr;
4793 }
4794 }
4795
4796 // Calling a null function pointer is undefined if a null address isn't
4797 // dereferenceable.
4798 if ((isa<ConstantPointerNull>(Callee) &&
4800 isa<UndefValue>(Callee)) {
4801 // If Call does not return void then replaceInstUsesWith poison.
4802 // This allows ValueHandlers and custom metadata to adjust itself.
4803 if (!Call.getType()->isVoidTy())
4805
4806 if (Call.isTerminator()) {
4807 // Can't remove an invoke or callbr because we cannot change the CFG.
4808 return nullptr;
4809 }
4810
4811 // This instruction is not reachable, just remove it.
4814 }
4815
4816 if (IntrinsicInst *II = findInitTrampoline(Callee))
4817 return transformCallThroughTrampoline(Call, *II);
4818
4819 // Combine calls involving pointer authentication intrinsics.
4820 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4821 return NewCall;
4822
4823 // Combine calls to ptrauth constants.
4824 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4825 return NewCall;
4826
4827 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4828 InlineAsm *IA = cast<InlineAsm>(Callee);
4829 if (!IA->canThrow()) {
4830 // Normal inline asm calls cannot throw - mark them
4831 // 'nounwind'.
4833 Changed = true;
4834 }
4835 }
4836
4837 // Try to optimize the call if possible, we require DataLayout for most of
4838 // this. None of these calls are seen as possibly dead so go ahead and
4839 // delete the instruction now.
4840 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4841 Instruction *I = tryOptimizeCall(CI);
4842 // If we changed something return the result, etc. Otherwise let
4843 // the fallthrough check.
4844 if (I) return eraseInstFromFunction(*I);
4845 }
4846
4847 if (!Call.use_empty() && !Call.isMustTailCall())
4848 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4849 Type *CallTy = Call.getType();
4850 Type *RetArgTy = ReturnedArg->getType();
4851 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4852 return replaceInstUsesWith(
4853 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4854 }
4855
4856 // Drop unnecessary callee_type metadata from calls that were converted
4857 // into direct calls.
4858 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4859 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4860 Changed = true;
4861 }
4862
4863 // Drop unnecessary kcfi operand bundles from calls that were converted
4864 // into direct calls.
4866 if (Bundle && !Call.isIndirectCall()) {
4867 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4868 if (CalleeF) {
4869 ConstantInt *FunctionType = nullptr;
4870 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4871
4872 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4873 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4874
4875 if (FunctionType &&
4876 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4877 dbgs() << Call.getModule()->getName()
4878 << ": warning: kcfi: " << Call.getCaller()->getName()
4879 << ": call to " << CalleeF->getName()
4880 << " using a mismatching function pointer type\n";
4881 }
4882 });
4883
4885 }
4886
4887 if (isRemovableAlloc(&Call, &TLI))
4888 return visitAllocSite(Call);
4889
4890 // Handle intrinsics which can be used in both call and invoke context.
4891 switch (Call.getIntrinsicID()) {
4892 case Intrinsic::experimental_gc_statepoint: {
4893 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4894 SmallPtrSet<Value *, 32> LiveGcValues;
4895 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4896 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4897
4898 // Remove the relocation if unused.
4899 if (GCR.use_empty()) {
4901 continue;
4902 }
4903
4904 Value *DerivedPtr = GCR.getDerivedPtr();
4905 Value *BasePtr = GCR.getBasePtr();
4906
4907 // Undef is undef, even after relocation.
4908 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4911 continue;
4912 }
4913
4914 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4915 // The relocation of null will be null for most any collector.
4916 // TODO: provide a hook for this in GCStrategy. There might be some
4917 // weird collector this property does not hold for.
4918 if (isa<ConstantPointerNull>(DerivedPtr)) {
4919 // Use null-pointer of gc_relocate's type to replace it.
4922 continue;
4923 }
4924
4925 // isKnownNonNull -> nonnull attribute
4926 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4927 isKnownNonZero(DerivedPtr,
4928 getSimplifyQuery().getWithInstruction(&Call))) {
4929 GCR.addRetAttr(Attribute::NonNull);
4930 // We discovered new fact, re-check users.
4931 Worklist.pushUsersToWorkList(GCR);
4932 }
4933 }
4934
4935 // If we have two copies of the same pointer in the statepoint argument
4936 // list, canonicalize to one. This may let us common gc.relocates.
4937 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4938 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4939 auto *OpIntTy = GCR.getOperand(2)->getType();
4940 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4941 }
4942
4943 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4944 // Canonicalize on the type from the uses to the defs
4945
4946 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4947 LiveGcValues.insert(BasePtr);
4948 LiveGcValues.insert(DerivedPtr);
4949 }
4950 std::optional<OperandBundleUse> Bundle =
4952 unsigned NumOfGCLives = LiveGcValues.size();
4953 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4954 break;
4955 // We can reduce the size of gc live bundle.
4956 DenseMap<Value *, unsigned> Val2Idx;
4957 std::vector<Value *> NewLiveGc;
4958 for (Value *V : Bundle->Inputs) {
4959 auto [It, Inserted] = Val2Idx.try_emplace(V);
4960 if (!Inserted)
4961 continue;
4962 if (LiveGcValues.count(V)) {
4963 It->second = NewLiveGc.size();
4964 NewLiveGc.push_back(V);
4965 } else
4966 It->second = NumOfGCLives;
4967 }
4968 // Update all gc.relocates
4969 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4970 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4971 Value *BasePtr = GCR.getBasePtr();
4972 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4973 "Missed live gc for base pointer");
4974 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4975 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4976 Value *DerivedPtr = GCR.getDerivedPtr();
4977 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4978 "Missed live gc for derived pointer");
4979 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4980 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4981 }
4982 // Create new statepoint instruction.
4983 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4984 return CallBase::Create(&Call, NewBundle);
4985 }
4986 default: { break; }
4987 }
4988
4989 return Changed ? &Call : nullptr;
4990}
4991
4992/// If the callee is a constexpr cast of a function, attempt to move the cast to
4993/// the arguments of the call/invoke.
4994/// CallBrInst is not supported.
4995bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4996 auto *Callee =
4998 if (!Callee)
4999 return false;
5000
5002 "CallBr's don't have a single point after a def to insert at");
5003
5004 // Don't perform the transform for declarations, which may not be fully
5005 // accurate. For example, void @foo() is commonly used as a placeholder for
5006 // unknown prototypes.
5007 if (Callee->isDeclaration())
5008 return false;
5009
5010 // If this is a call to a thunk function, don't remove the cast. Thunks are
5011 // used to transparently forward all incoming parameters and outgoing return
5012 // values, so it's important to leave the cast in place.
5013 if (Callee->hasFnAttribute("thunk"))
5014 return false;
5015
5016 // If this is a call to a naked function, the assembly might be
5017 // using an argument, or otherwise rely on the frame layout,
5018 // the function prototype will mismatch.
5019 if (Callee->hasFnAttribute(Attribute::Naked))
5020 return false;
5021
5022 // If this is a musttail call, the callee's prototype must match the caller's
5023 // prototype with the exception of pointee types. The code below doesn't
5024 // implement that, so we can't do this transform.
5025 // TODO: Do the transform if it only requires adding pointer casts.
5026 if (Call.isMustTailCall())
5027 return false;
5028
5030 const AttributeList &CallerPAL = Call.getAttributes();
5031
5032 // Okay, this is a cast from a function to a different type. Unless doing so
5033 // would cause a type conversion of one of our arguments, change this call to
5034 // be a direct call with arguments casted to the appropriate types.
5035 FunctionType *FT = Callee->getFunctionType();
5036 Type *OldRetTy = Caller->getType();
5037 Type *NewRetTy = FT->getReturnType();
5038
5039 // Check to see if we are changing the return type...
5040 if (OldRetTy != NewRetTy) {
5041
5042 if (NewRetTy->isStructTy())
5043 return false; // TODO: Handle multiple return values.
5044
5045 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5046 if (!Caller->use_empty())
5047 return false; // Cannot transform this return value.
5048 }
5049
5050 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5051 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5052 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5053 NewRetTy, CallerPAL.getRetAttrs())))
5054 return false; // Attribute not compatible with transformed value.
5055 }
5056
5057 // If the callbase is an invoke instruction, and the return value is
5058 // used by a PHI node in a successor, we cannot change the return type of
5059 // the call because there is no place to put the cast instruction (without
5060 // breaking the critical edge). Bail out in this case.
5061 if (!Caller->use_empty()) {
5062 BasicBlock *PhisNotSupportedBlock = nullptr;
5063 if (auto *II = dyn_cast<InvokeInst>(Caller))
5064 PhisNotSupportedBlock = II->getNormalDest();
5065 if (PhisNotSupportedBlock)
5066 for (User *U : Caller->users())
5067 if (PHINode *PN = dyn_cast<PHINode>(U))
5068 if (PN->getParent() == PhisNotSupportedBlock)
5069 return false;
5070 }
5071 }
5072
5073 unsigned NumActualArgs = Call.arg_size();
5074 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5075
5076 // Prevent us turning:
5077 // declare void @takes_i32_inalloca(i32* inalloca)
5078 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5079 //
5080 // into:
5081 // call void @takes_i32_inalloca(i32* null)
5082 //
5083 // Similarly, avoid folding away bitcasts of byval calls.
5084 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5085 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5086 return false;
5087
5088 auto AI = Call.arg_begin();
5089 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5090 Type *ParamTy = FT->getParamType(i);
5091 Type *ActTy = (*AI)->getType();
5092
5093 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5094 return false; // Cannot transform this parameter value.
5095
5096 // Check if there are any incompatible attributes we cannot drop safely.
5097 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5098 .overlaps(AttributeFuncs::typeIncompatible(
5099 ParamTy, CallerPAL.getParamAttrs(i),
5100 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5101 return false; // Attribute not compatible with transformed value.
5102
5103 if (Call.isInAllocaArgument(i) ||
5104 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5105 return false; // Cannot transform to and from inalloca/preallocated.
5106
5107 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5108 return false;
5109
5110 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5111 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5112 return false; // Cannot transform to or from byval.
5113 }
5114
5115 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5116 !CallerPAL.isEmpty()) {
5117 // In this case we have more arguments than the new function type, but we
5118 // won't be dropping them. Check that these extra arguments have attributes
5119 // that are compatible with being a vararg call argument.
5120 unsigned SRetIdx;
5121 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5122 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5123 return false;
5124 }
5125
5126 // Okay, we decided that this is a safe thing to do: go ahead and start
5127 // inserting cast instructions as necessary.
5128 SmallVector<Value *, 8> Args;
5130 Args.reserve(NumActualArgs);
5131 ArgAttrs.reserve(NumActualArgs);
5132
5133 // Get any return attributes.
5134 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5135
5136 // If the return value is not being used, the type may not be compatible
5137 // with the existing attributes. Wipe out any problematic attributes.
5138 RAttrs.remove(
5139 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5140
5141 LLVMContext &Ctx = Call.getContext();
5142 AI = Call.arg_begin();
5143 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5144 Type *ParamTy = FT->getParamType(i);
5145
5146 Value *NewArg = *AI;
5147 if ((*AI)->getType() != ParamTy)
5148 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5149 Args.push_back(NewArg);
5150
5151 // Add any parameter attributes except the ones incompatible with the new
5152 // type. Note that we made sure all incompatible ones are safe to drop.
5153 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5154 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5155 ArgAttrs.push_back(
5156 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5157 }
5158
5159 // If the function takes more arguments than the call was taking, add them
5160 // now.
5161 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5162 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5163 ArgAttrs.push_back(AttributeSet());
5164 }
5165
5166 // If we are removing arguments to the function, emit an obnoxious warning.
5167 if (FT->getNumParams() < NumActualArgs) {
5168 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5169 if (FT->isVarArg()) {
5170 // Add all of the arguments in their promoted form to the arg list.
5171 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5172 Type *PTy = getPromotedType((*AI)->getType());
5173 Value *NewArg = *AI;
5174 if (PTy != (*AI)->getType()) {
5175 // Must promote to pass through va_arg area!
5176 Instruction::CastOps opcode =
5177 CastInst::getCastOpcode(*AI, false, PTy, false);
5178 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5179 }
5180 Args.push_back(NewArg);
5181
5182 // Add any parameter attributes.
5183 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5184 }
5185 }
5186 }
5187
5188 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5189
5190 if (NewRetTy->isVoidTy())
5191 Caller->setName(""); // Void type should not have a name.
5192
5193 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5194 "missing argument attributes");
5195 AttributeList NewCallerPAL = AttributeList::get(
5196 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5197
5199 Call.getOperandBundlesAsDefs(OpBundles);
5200
5201 CallBase *NewCall;
5202 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5203 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5204 II->getUnwindDest(), Args, OpBundles);
5205 } else {
5206 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5207 cast<CallInst>(NewCall)->setTailCallKind(
5208 cast<CallInst>(Caller)->getTailCallKind());
5209 }
5210 NewCall->takeName(Caller);
5212 NewCall->setAttributes(NewCallerPAL);
5213
5214 // Preserve prof metadata if any.
5215 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5216
5217 // Insert a cast of the return type as necessary.
5218 Instruction *NC = NewCall;
5219 Value *NV = NC;
5220 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5221 assert(!NV->getType()->isVoidTy());
5223 NC->setDebugLoc(Caller->getDebugLoc());
5224
5225 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5226 assert(OptInsertPt && "No place to insert cast");
5227 InsertNewInstBefore(NC, *OptInsertPt);
5228 Worklist.pushUsersToWorkList(*Caller);
5229 }
5230
5231 if (!Caller->use_empty())
5232 replaceInstUsesWith(*Caller, NV);
5233 else if (Caller->hasValueHandle()) {
5234 if (OldRetTy == NV->getType())
5236 else
5237 // We cannot call ValueIsRAUWd with a different type, and the
5238 // actual tracked value will disappear.
5240 }
5241
5242 eraseInstFromFunction(*Caller);
5243 return true;
5244}
5245
5246/// Turn a call to a function created by init_trampoline / adjust_trampoline
5247/// intrinsic pair into a direct call to the underlying function.
5249InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5250 IntrinsicInst &Tramp) {
5251 FunctionType *FTy = Call.getFunctionType();
5252 AttributeList Attrs = Call.getAttributes();
5253
5254 // If the call already has the 'nest' attribute somewhere then give up -
5255 // otherwise 'nest' would occur twice after splicing in the chain.
5256 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5257 return nullptr;
5258
5260 FunctionType *NestFTy = NestF->getFunctionType();
5261
5262 AttributeList NestAttrs = NestF->getAttributes();
5263 if (!NestAttrs.isEmpty()) {
5264 unsigned NestArgNo = 0;
5265 Type *NestTy = nullptr;
5266 AttributeSet NestAttr;
5267
5268 // Look for a parameter marked with the 'nest' attribute.
5269 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5270 E = NestFTy->param_end();
5271 I != E; ++NestArgNo, ++I) {
5272 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5273 if (AS.hasAttribute(Attribute::Nest)) {
5274 // Record the parameter type and any other attributes.
5275 NestTy = *I;
5276 NestAttr = AS;
5277 break;
5278 }
5279 }
5280
5281 if (NestTy) {
5282 std::vector<Value*> NewArgs;
5283 std::vector<AttributeSet> NewArgAttrs;
5284 NewArgs.reserve(Call.arg_size() + 1);
5285 NewArgAttrs.reserve(Call.arg_size());
5286
5287 // Insert the nest argument into the call argument list, which may
5288 // mean appending it. Likewise for attributes.
5289
5290 {
5291 unsigned ArgNo = 0;
5292 auto I = Call.arg_begin(), E = Call.arg_end();
5293 do {
5294 if (ArgNo == NestArgNo) {
5295 // Add the chain argument and attributes.
5296 Value *NestVal = Tramp.getArgOperand(2);
5297 if (NestVal->getType() != NestTy)
5298 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5299 NewArgs.push_back(NestVal);
5300 NewArgAttrs.push_back(NestAttr);
5301 }
5302
5303 if (I == E)
5304 break;
5305
5306 // Add the original argument and attributes.
5307 NewArgs.push_back(*I);
5308 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5309
5310 ++ArgNo;
5311 ++I;
5312 } while (true);
5313 }
5314
5315 // The trampoline may have been bitcast to a bogus type (FTy).
5316 // Handle this by synthesizing a new function type, equal to FTy
5317 // with the chain parameter inserted.
5318
5319 std::vector<Type*> NewTypes;
5320 NewTypes.reserve(FTy->getNumParams()+1);
5321
5322 // Insert the chain's type into the list of parameter types, which may
5323 // mean appending it.
5324 {
5325 unsigned ArgNo = 0;
5326 FunctionType::param_iterator I = FTy->param_begin(),
5327 E = FTy->param_end();
5328
5329 do {
5330 if (ArgNo == NestArgNo)
5331 // Add the chain's type.
5332 NewTypes.push_back(NestTy);
5333
5334 if (I == E)
5335 break;
5336
5337 // Add the original type.
5338 NewTypes.push_back(*I);
5339
5340 ++ArgNo;
5341 ++I;
5342 } while (true);
5343 }
5344
5345 // Replace the trampoline call with a direct call. Let the generic
5346 // code sort out any function type mismatches.
5347 FunctionType *NewFTy =
5348 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5349 AttributeList NewPAL =
5350 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5351 Attrs.getRetAttrs(), NewArgAttrs);
5352
5354 Call.getOperandBundlesAsDefs(OpBundles);
5355
5356 Instruction *NewCaller;
5357 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5358 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5359 II->getUnwindDest(), NewArgs, OpBundles);
5360 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5361 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5362 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5363 NewCaller =
5364 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5365 CBI->getIndirectDests(), NewArgs, OpBundles);
5366 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5367 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5368 } else {
5369 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5370 cast<CallInst>(NewCaller)->setTailCallKind(
5371 cast<CallInst>(Call).getTailCallKind());
5372 cast<CallInst>(NewCaller)->setCallingConv(
5373 cast<CallInst>(Call).getCallingConv());
5374 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5375 }
5376 NewCaller->setDebugLoc(Call.getDebugLoc());
5377
5378 return NewCaller;
5379 }
5380 }
5381
5382 // Replace the trampoline call with a direct call. Since there is no 'nest'
5383 // parameter, there is no need to adjust the argument list. Let the generic
5384 // code sort out any function type mismatches.
5385 Call.setCalledFunction(FTy, NestF);
5386 return &Call;
5387}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy)
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:260
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:273
bool isNegative() const
Definition APFloat.h:1538
void clearSign()
Definition APFloat.h:1357
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
bool isZero() const
Definition APFloat.h:1534
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1207
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2000
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1980
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1987
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2088
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1993
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:271
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:314
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:275
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI CallBase * removeOperandBundleAt(CallBase *CB, size_t Offset, InsertPosition InsertPtr=nullptr)
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:874
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI ConstantRange multiply(const ConstantRange &Other, unsigned NoWrapKind=0) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:216
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1461
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2130
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2659
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2494
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2257
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1075
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1567
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:271
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:130
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:148
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1242
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1295
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:798
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:222
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:712
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:797
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
auto m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
auto m_c_MaxOrMin(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1740
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1695
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1726
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1640
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1676
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:271
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1713
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1753
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:109
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:81
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const