LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
760
762 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
763 return DAG.getBitcast(DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(DemandedBits, Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
780 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
781 return DAG.getBitcast(DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(0);
795 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(0);
808 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
819 return Op.getOperand(0);
820 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
821 return Op.getOperand(1);
822 break;
823 }
824 case ISD::ADD:
825 case ISD::MUL:
826 case ISD::SMIN:
827 case ISD::SMAX:
828 case ISD::UMIN:
829 case ISD::UMAX: {
830 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(1),
831 DemandedElts, 1, Depth + 1))
832 return Op.getOperand(0);
833
834 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(0),
835 DemandedElts, 0, Depth + 1))
836 return Op.getOperand(1);
837 break;
838 }
839 case ISD::SHL: {
840 // If we are only demanding sign bits then we can use the shift source
841 // directly.
842 if (std::optional<unsigned> MaxSA =
843 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
844 SDValue Op0 = Op.getOperand(0);
845 unsigned ShAmt = *MaxSA;
846 unsigned NumSignBits =
847 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
848 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
849 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
850 return Op0;
851 }
852 break;
853 }
854 case ISD::SRL: {
855 // If we are only demanding sign bits then we can use the shift source
856 // directly.
857 if (std::optional<unsigned> MaxSA =
858 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
859 SDValue Op0 = Op.getOperand(0);
860 unsigned ShAmt = *MaxSA;
861 // Must already be signbits in DemandedBits bounds, and can't demand any
862 // shifted in zeroes.
863 if (DemandedBits.countl_zero() >= ShAmt) {
864 unsigned NumSignBits =
865 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
866 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
867 return Op0;
868 }
869 }
870 break;
871 }
872 case ISD::SETCC: {
873 SDValue Op0 = Op.getOperand(0);
874 SDValue Op1 = Op.getOperand(1);
875 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
876 // If (1) we only need the sign-bit, (2) the setcc operands are the same
877 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
878 // -1, we may be able to bypass the setcc.
879 if (DemandedBits.isSignMask() &&
883 // If we're testing X < 0, then this compare isn't needed - just use X!
884 // FIXME: We're limiting to integer types here, but this should also work
885 // if we don't care about FP signed-zero. The use of SETLT with FP means
886 // that we don't care about NaNs.
887 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
889 return Op0;
890 }
891 break;
892 }
894 // If none of the extended bits are demanded, eliminate the sextinreg.
895 SDValue Op0 = Op.getOperand(0);
896 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
897 unsigned ExBits = ExVT.getScalarSizeInBits();
898 if (DemandedBits.getActiveBits() <= ExBits &&
900 return Op0;
901 // If the input is already sign extended, just drop the extension.
902 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
903 if (NumSignBits >= (BitWidth - ExBits + 1))
904 return Op0;
905 break;
906 }
910 if (VT.isScalableVector())
911 return SDValue();
912
913 // If we only want the lowest element and none of extended bits, then we can
914 // return the bitcasted source vector.
915 SDValue Src = Op.getOperand(0);
916 EVT SrcVT = Src.getValueType();
917 EVT DstVT = Op.getValueType();
918 if (IsLE && DemandedElts == 1 &&
919 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
920 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
921 return DAG.getBitcast(DstVT, Src);
922 }
923 break;
924 }
926 if (VT.isScalableVector())
927 return SDValue();
928
929 // If we don't demand the inserted element, return the base vector.
930 SDValue Vec = Op.getOperand(0);
931 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
932 EVT VecVT = Vec.getValueType();
933 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
934 !DemandedElts[CIdx->getZExtValue()])
935 return Vec;
936 break;
937 }
939 if (VT.isScalableVector())
940 return SDValue();
941
942 SDValue Vec = Op.getOperand(0);
943 SDValue Sub = Op.getOperand(1);
944 uint64_t Idx = Op.getConstantOperandVal(2);
945 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
946 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
947 // If we don't demand the inserted subvector, return the base vector.
948 if (DemandedSubElts == 0)
949 return Vec;
950 break;
951 }
952 case ISD::VECTOR_SHUFFLE: {
954 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
955
956 // If all the demanded elts are from one operand and are inline,
957 // then we can use the operand directly.
958 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
959 for (unsigned i = 0; i != NumElts; ++i) {
960 int M = ShuffleMask[i];
961 if (M < 0 || !DemandedElts[i])
962 continue;
963 AllUndef = false;
964 IdentityLHS &= (M == (int)i);
965 IdentityRHS &= ((M - NumElts) == i);
966 }
967
968 if (AllUndef)
969 return DAG.getUNDEF(Op.getValueType());
970 if (IdentityLHS)
971 return Op.getOperand(0);
972 if (IdentityRHS)
973 return Op.getOperand(1);
974 break;
975 }
976 default:
977 // TODO: Probably okay to remove after audit; here to reduce change size
978 // in initial enablement patch for scalable vectors
979 if (VT.isScalableVector())
980 return SDValue();
981
982 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
984 Op, DemandedBits, DemandedElts, DAG, Depth))
985 return V;
986 break;
987 }
988 return SDValue();
989}
990
993 unsigned Depth) const {
994 EVT VT = Op.getValueType();
995 // Since the number of lanes in a scalable vector is unknown at compile time,
996 // we track one bit which is implicitly broadcast to all lanes. This means
997 // that all lanes in a scalable vector are considered demanded.
998 APInt DemandedElts = VT.isFixedLengthVector()
1000 : APInt(1, 1);
1001 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1002 Depth);
1003}
1004
1006 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1007 unsigned Depth) const {
1008 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1009 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1010 Depth);
1011}
1012
1013// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1014// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1017 const TargetLowering &TLI,
1018 const APInt &DemandedBits,
1019 const APInt &DemandedElts, unsigned Depth) {
1020 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1021 "SRL or SRA node is required here!");
1022 // Is the right shift using an immediate value of 1?
1023 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1024 if (!N1C || !N1C->isOne())
1025 return SDValue();
1026
1027 // We are looking for an avgfloor
1028 // add(ext, ext)
1029 // or one of these as a avgceil
1030 // add(add(ext, ext), 1)
1031 // add(add(ext, 1), ext)
1032 // add(ext, add(ext, 1))
1033 SDValue Add = Op.getOperand(0);
1034 if (Add.getOpcode() != ISD::ADD)
1035 return SDValue();
1036
1037 SDValue ExtOpA = Add.getOperand(0);
1038 SDValue ExtOpB = Add.getOperand(1);
1039 SDValue Add2;
1040 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1041 ConstantSDNode *ConstOp;
1042 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1043 ConstOp->isOne()) {
1044 ExtOpA = Op1;
1045 ExtOpB = Op3;
1046 Add2 = A;
1047 return true;
1048 }
1049 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1050 ConstOp->isOne()) {
1051 ExtOpA = Op1;
1052 ExtOpB = Op2;
1053 Add2 = A;
1054 return true;
1055 }
1056 return false;
1057 };
1058 bool IsCeil =
1059 (ExtOpA.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1061 (ExtOpB.getOpcode() == ISD::ADD &&
1062 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1063
1064 // If the shift is signed (sra):
1065 // - Needs >= 2 sign bit for both operands.
1066 // - Needs >= 2 zero bits.
1067 // If the shift is unsigned (srl):
1068 // - Needs >= 1 zero bit for both operands.
1069 // - Needs 1 demanded bit zero and >= 2 sign bits.
1070 SelectionDAG &DAG = TLO.DAG;
1071 unsigned ShiftOpc = Op.getOpcode();
1072 bool IsSigned = false;
1073 unsigned KnownBits;
1074 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1075 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1076 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1077 unsigned NumZeroA =
1078 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZeroB =
1080 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1081 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1082
1083 switch (ShiftOpc) {
1084 default:
1085 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1086 case ISD::SRA: {
1087 if (NumZero >= 2 && NumSigned < NumZero) {
1088 IsSigned = false;
1089 KnownBits = NumZero;
1090 break;
1091 }
1092 if (NumSigned >= 1) {
1093 IsSigned = true;
1094 KnownBits = NumSigned;
1095 break;
1096 }
1097 return SDValue();
1098 }
1099 case ISD::SRL: {
1100 if (NumZero >= 1 && NumSigned < NumZero) {
1101 IsSigned = false;
1102 KnownBits = NumZero;
1103 break;
1104 }
1105 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1106 IsSigned = true;
1107 KnownBits = NumSigned;
1108 break;
1109 }
1110 return SDValue();
1111 }
1112 }
1113
1114 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1115 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1116
1117 // Find the smallest power-2 type that is legal for this vector size and
1118 // operation, given the original type size and the number of known sign/zero
1119 // bits.
1120 EVT VT = Op.getValueType();
1121 unsigned MinWidth =
1122 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1123 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1125 return SDValue();
1126 if (VT.isVector())
1127 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1128 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1129 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1130 // larger type size to do the transform.
1131 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1132 return SDValue();
1133 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1134 Add.getOperand(1)) &&
1135 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1136 Add2.getOperand(1))))
1137 NVT = VT;
1138 else
1139 return SDValue();
1140 }
1141
1142 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1143 // this is likely to stop other folds (reassociation, value tracking etc.)
1144 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1145 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1146 return SDValue();
1147
1148 SDLoc DL(Op);
1149 SDValue ResultAVG =
1150 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1151 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1152 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1153}
1154
1155/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1156/// result of Op are ever used downstream. If we can use this information to
1157/// simplify Op, create a new simplified DAG node and return true, returning the
1158/// original and new nodes in Old and New. Otherwise, analyze the expression and
1159/// return a mask of Known bits for the expression (used to simplify the
1160/// caller). The Known bits may only be accurate for those bits in the
1161/// OriginalDemandedBits and OriginalDemandedElts.
1163 SDValue Op, const APInt &OriginalDemandedBits,
1164 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1165 unsigned Depth, bool AssumeSingleUse) const {
1166 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1167 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1168 "Mask size mismatches value type size!");
1169
1170 // Don't know anything.
1171 Known = KnownBits(BitWidth);
1172
1173 EVT VT = Op.getValueType();
1174 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1175 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1176 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1177 "Unexpected vector size");
1178
1179 APInt DemandedBits = OriginalDemandedBits;
1180 APInt DemandedElts = OriginalDemandedElts;
1181 SDLoc dl(Op);
1182
1183 // Undef operand.
1184 if (Op.isUndef())
1185 return false;
1186
1187 // We can't simplify target constants.
1188 if (Op.getOpcode() == ISD::TargetConstant)
1189 return false;
1190
1191 if (Op.getOpcode() == ISD::Constant) {
1192 // We know all of the bits for a constant!
1193 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1194 return false;
1195 }
1196
1197 if (Op.getOpcode() == ISD::ConstantFP) {
1198 // We know all of the bits for a floating point constant!
1200 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1201 return false;
1202 }
1203
1204 // Other users may use these bits.
1205 bool HasMultiUse = false;
1206 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1208 // Limit search depth.
1209 return false;
1210 }
1211 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1213 DemandedElts = APInt::getAllOnes(NumElts);
1214 HasMultiUse = true;
1215 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1216 // Not demanding any bits/elts from Op.
1217 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1218 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1219 // Limit search depth.
1220 return false;
1221 }
1222
1223 KnownBits Known2;
1224 switch (Op.getOpcode()) {
1225 case ISD::SCALAR_TO_VECTOR: {
1226 if (VT.isScalableVector())
1227 return false;
1228 if (!DemandedElts[0])
1229 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1230
1231 KnownBits SrcKnown;
1232 SDValue Src = Op.getOperand(0);
1233 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1234 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1235 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1236 return true;
1237
1238 // Upper elements are undef, so only get the knownbits if we just demand
1239 // the bottom element.
1240 if (DemandedElts == 1)
1241 Known = SrcKnown.anyextOrTrunc(BitWidth);
1242 break;
1243 }
1244 case ISD::BUILD_VECTOR:
1245 // Collect the known bits that are shared by every demanded element.
1246 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1247 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1248 return false; // Don't fall through, will infinitely loop.
1249 case ISD::SPLAT_VECTOR: {
1250 SDValue Scl = Op.getOperand(0);
1251 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1252 KnownBits KnownScl;
1253 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1254 return true;
1255
1256 // Implicitly truncate the bits to match the official semantics of
1257 // SPLAT_VECTOR.
1258 Known = KnownScl.trunc(BitWidth);
1259 break;
1260 }
1261 case ISD::FREEZE: {
1262 SDValue N0 = Op.getOperand(0);
1264 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
1265 return TLO.CombineTo(Op, N0);
1266 break;
1267 }
1268 case ISD::LOAD: {
1269 auto *LD = cast<LoadSDNode>(Op);
1270 if (getTargetConstantFromLoad(LD)) {
1271 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1272 return false; // Don't fall through, will infinitely loop.
1273 }
1274 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1275 // If this is a ZEXTLoad and we are looking at the loaded value.
1276 EVT MemVT = LD->getMemoryVT();
1277 unsigned MemBits = MemVT.getScalarSizeInBits();
1278 Known.Zero.setBitsFrom(MemBits);
1279 return false; // Don't fall through, will infinitely loop.
1280 }
1281 break;
1282 }
1284 if (VT.isScalableVector())
1285 return false;
1286 SDValue Vec = Op.getOperand(0);
1287 SDValue Scl = Op.getOperand(1);
1288 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1289 EVT VecVT = Vec.getValueType();
1290
1291 // If index isn't constant, assume we need all vector elements AND the
1292 // inserted element.
1293 APInt DemandedVecElts(DemandedElts);
1294 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1295 unsigned Idx = CIdx->getZExtValue();
1296 DemandedVecElts.clearBit(Idx);
1297
1298 // Inserted element is not required.
1299 if (!DemandedElts[Idx])
1300 return TLO.CombineTo(Op, Vec);
1301 }
1302
1303 KnownBits KnownScl;
1304 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1305 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1306 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1307 return true;
1308
1309 Known = KnownScl.anyextOrTrunc(BitWidth);
1310
1311 KnownBits KnownVec;
1312 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1313 Depth + 1))
1314 return true;
1315
1316 if (!!DemandedVecElts)
1317 Known = Known.intersectWith(KnownVec);
1318
1319 return false;
1320 }
1321 case ISD::INSERT_SUBVECTOR: {
1322 if (VT.isScalableVector())
1323 return false;
1324 // Demand any elements from the subvector and the remainder from the src its
1325 // inserted into.
1326 SDValue Src = Op.getOperand(0);
1327 SDValue Sub = Op.getOperand(1);
1328 uint64_t Idx = Op.getConstantOperandVal(2);
1329 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1330 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1331 APInt DemandedSrcElts = DemandedElts;
1332 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1333
1334 KnownBits KnownSub, KnownSrc;
1335 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1336 Depth + 1))
1337 return true;
1338 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1339 Depth + 1))
1340 return true;
1341
1342 Known.setAllConflict();
1343 if (!!DemandedSubElts)
1344 Known = Known.intersectWith(KnownSub);
1345 if (!!DemandedSrcElts)
1346 Known = Known.intersectWith(KnownSrc);
1347
1348 // Attempt to avoid multi-use src if we don't need anything from it.
1349 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1350 !DemandedSrcElts.isAllOnes()) {
1352 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1354 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1355 if (NewSub || NewSrc) {
1356 NewSub = NewSub ? NewSub : Sub;
1357 NewSrc = NewSrc ? NewSrc : Src;
1358 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1359 Op.getOperand(2));
1360 return TLO.CombineTo(Op, NewOp);
1361 }
1362 }
1363 break;
1364 }
1366 if (VT.isScalableVector())
1367 return false;
1368 // Offset the demanded elts by the subvector index.
1369 SDValue Src = Op.getOperand(0);
1370 if (Src.getValueType().isScalableVector())
1371 break;
1372 uint64_t Idx = Op.getConstantOperandVal(1);
1373 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1374 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1375
1376 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1377 Depth + 1))
1378 return true;
1379
1380 // Attempt to avoid multi-use src if we don't need anything from it.
1381 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1383 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1384 if (DemandedSrc) {
1385 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1386 Op.getOperand(1));
1387 return TLO.CombineTo(Op, NewOp);
1388 }
1389 }
1390 break;
1391 }
1392 case ISD::CONCAT_VECTORS: {
1393 if (VT.isScalableVector())
1394 return false;
1395 Known.setAllConflict();
1396 EVT SubVT = Op.getOperand(0).getValueType();
1397 unsigned NumSubVecs = Op.getNumOperands();
1398 unsigned NumSubElts = SubVT.getVectorNumElements();
1399 for (unsigned i = 0; i != NumSubVecs; ++i) {
1400 APInt DemandedSubElts =
1401 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1402 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1403 Known2, TLO, Depth + 1))
1404 return true;
1405 // Known bits are shared by every demanded subvector element.
1406 if (!!DemandedSubElts)
1407 Known = Known.intersectWith(Known2);
1408 }
1409 break;
1410 }
1411 case ISD::VECTOR_SHUFFLE: {
1412 assert(!VT.isScalableVector());
1413 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1414
1415 // Collect demanded elements from shuffle operands..
1416 APInt DemandedLHS, DemandedRHS;
1417 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1418 DemandedRHS))
1419 break;
1420
1421 if (!!DemandedLHS || !!DemandedRHS) {
1422 SDValue Op0 = Op.getOperand(0);
1423 SDValue Op1 = Op.getOperand(1);
1424
1425 Known.setAllConflict();
1426 if (!!DemandedLHS) {
1427 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1428 Depth + 1))
1429 return true;
1430 Known = Known.intersectWith(Known2);
1431 }
1432 if (!!DemandedRHS) {
1433 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1434 Depth + 1))
1435 return true;
1436 Known = Known.intersectWith(Known2);
1437 }
1438
1439 // Attempt to avoid multi-use ops if we don't need anything from them.
1441 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1443 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1444 if (DemandedOp0 || DemandedOp1) {
1445 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1446 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1447 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1448 return TLO.CombineTo(Op, NewOp);
1449 }
1450 }
1451 break;
1452 }
1453 case ISD::AND: {
1454 SDValue Op0 = Op.getOperand(0);
1455 SDValue Op1 = Op.getOperand(1);
1456
1457 // If the RHS is a constant, check to see if the LHS would be zero without
1458 // using the bits from the RHS. Below, we use knowledge about the RHS to
1459 // simplify the LHS, here we're using information from the LHS to simplify
1460 // the RHS.
1461 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1462 // Do not increment Depth here; that can cause an infinite loop.
1463 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1464 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1465 if ((LHSKnown.Zero & DemandedBits) ==
1466 (~RHSC->getAPIntValue() & DemandedBits))
1467 return TLO.CombineTo(Op, Op0);
1468
1469 // If any of the set bits in the RHS are known zero on the LHS, shrink
1470 // the constant.
1471 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1472 DemandedElts, TLO))
1473 return true;
1474
1475 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1476 // constant, but if this 'and' is only clearing bits that were just set by
1477 // the xor, then this 'and' can be eliminated by shrinking the mask of
1478 // the xor. For example, for a 32-bit X:
1479 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1480 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1481 LHSKnown.One == ~RHSC->getAPIntValue()) {
1482 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1483 return TLO.CombineTo(Op, Xor);
1484 }
1485 }
1486
1487 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1488 SDValue X, Y;
1489 if (sd_match(Op,
1490 m_And(m_Value(Y),
1492 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1493 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1494 return TLO.CombineTo(
1495 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1496 }
1497
1498 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1499 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1500 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1501 (Op0.getOperand(0).isUndef() ||
1503 Op0->hasOneUse()) {
1504 unsigned NumSubElts =
1506 unsigned SubIdx = Op0.getConstantOperandVal(2);
1507 APInt DemandedSub =
1508 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1509 KnownBits KnownSubMask =
1510 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1511 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1512 SDValue NewAnd =
1513 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1514 SDValue NewInsert =
1515 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1516 Op0.getOperand(1), Op0.getOperand(2));
1517 return TLO.CombineTo(Op, NewInsert);
1518 }
1519 }
1520
1521 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1522 Depth + 1))
1523 return true;
1524 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1525 Known2, TLO, Depth + 1))
1526 return true;
1527
1528 // If all of the demanded bits are known one on one side, return the other.
1529 // These bits cannot contribute to the result of the 'and'.
1530 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1531 return TLO.CombineTo(Op, Op0);
1532 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1533 return TLO.CombineTo(Op, Op1);
1534 // If all of the demanded bits in the inputs are known zeros, return zero.
1535 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1536 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1537 // If the RHS is a constant, see if we can simplify it.
1538 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1539 TLO))
1540 return true;
1541 // If the operation can be done in a smaller type, do so.
1543 return true;
1544
1545 // Attempt to avoid multi-use ops if we don't need anything from them.
1546 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1548 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1550 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1551 if (DemandedOp0 || DemandedOp1) {
1552 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1553 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1554 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1555 return TLO.CombineTo(Op, NewOp);
1556 }
1557 }
1558
1559 Known &= Known2;
1560 break;
1561 }
1562 case ISD::OR: {
1563 SDValue Op0 = Op.getOperand(0);
1564 SDValue Op1 = Op.getOperand(1);
1565 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1566 Depth + 1)) {
1567 Op->dropFlags(SDNodeFlags::Disjoint);
1568 return true;
1569 }
1570
1571 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1572 Known2, TLO, Depth + 1)) {
1573 Op->dropFlags(SDNodeFlags::Disjoint);
1574 return true;
1575 }
1576
1577 // If all of the demanded bits are known zero on one side, return the other.
1578 // These bits cannot contribute to the result of the 'or'.
1579 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1580 return TLO.CombineTo(Op, Op0);
1581 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1582 return TLO.CombineTo(Op, Op1);
1583 // If the RHS is a constant, see if we can simplify it.
1584 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1585 return true;
1586 // If the operation can be done in a smaller type, do so.
1588 return true;
1589
1590 // Attempt to avoid multi-use ops if we don't need anything from them.
1591 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1593 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1595 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1596 if (DemandedOp0 || DemandedOp1) {
1597 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1598 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1599 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1600 return TLO.CombineTo(Op, NewOp);
1601 }
1602 }
1603
1604 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1605 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1606 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1607 Op0->hasOneUse() && Op1->hasOneUse()) {
1608 // Attempt to match all commutations - m_c_Or would've been useful!
1609 for (int I = 0; I != 2; ++I) {
1610 SDValue X = Op.getOperand(I).getOperand(0);
1611 SDValue C1 = Op.getOperand(I).getOperand(1);
1612 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1613 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1614 if (Alt.getOpcode() == ISD::OR) {
1615 for (int J = 0; J != 2; ++J) {
1616 if (X == Alt.getOperand(J)) {
1617 SDValue Y = Alt.getOperand(1 - J);
1618 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1619 {C1, C2})) {
1620 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1621 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1622 return TLO.CombineTo(
1623 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1624 }
1625 }
1626 }
1627 }
1628 }
1629 }
1630
1631 Known |= Known2;
1632 break;
1633 }
1634 case ISD::XOR: {
1635 SDValue Op0 = Op.getOperand(0);
1636 SDValue Op1 = Op.getOperand(1);
1637
1638 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1639 Depth + 1))
1640 return true;
1641 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1642 Depth + 1))
1643 return true;
1644
1645 // If all of the demanded bits are known zero on one side, return the other.
1646 // These bits cannot contribute to the result of the 'xor'.
1647 if (DemandedBits.isSubsetOf(Known.Zero))
1648 return TLO.CombineTo(Op, Op0);
1649 if (DemandedBits.isSubsetOf(Known2.Zero))
1650 return TLO.CombineTo(Op, Op1);
1651 // If the operation can be done in a smaller type, do so.
1653 return true;
1654
1655 // If all of the unknown bits are known to be zero on one side or the other
1656 // turn this into an *inclusive* or.
1657 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1658 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1659 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1660
1661 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1662 if (C) {
1663 // If one side is a constant, and all of the set bits in the constant are
1664 // also known set on the other side, turn this into an AND, as we know
1665 // the bits will be cleared.
1666 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1667 // NB: it is okay if more bits are known than are requested
1668 if (C->getAPIntValue() == Known2.One) {
1669 SDValue ANDC =
1670 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1671 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1672 }
1673
1674 // If the RHS is a constant, see if we can change it. Don't alter a -1
1675 // constant because that's a 'not' op, and that is better for combining
1676 // and codegen.
1677 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1678 // We're flipping all demanded bits. Flip the undemanded bits too.
1679 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1680 return TLO.CombineTo(Op, New);
1681 }
1682
1683 unsigned Op0Opcode = Op0.getOpcode();
1684 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1685 if (ConstantSDNode *ShiftC =
1686 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1687 // Don't crash on an oversized shift. We can not guarantee that a
1688 // bogus shift has been simplified to undef.
1689 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1690 uint64_t ShiftAmt = ShiftC->getZExtValue();
1692 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1693 : Ones.lshr(ShiftAmt);
1694 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1696 // If the xor constant is a demanded mask, do a 'not' before the
1697 // shift:
1698 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1699 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1700 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1701 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1702 Op0.getOperand(1)));
1703 }
1704 }
1705 }
1706 }
1707 }
1708
1709 // If we can't turn this into a 'not', try to shrink the constant.
1710 if (!C || !C->isAllOnes())
1711 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1712 return true;
1713
1714 // Attempt to avoid multi-use ops if we don't need anything from them.
1715 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1717 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1719 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1720 if (DemandedOp0 || DemandedOp1) {
1721 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1722 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1723 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1724 return TLO.CombineTo(Op, NewOp);
1725 }
1726 }
1727
1728 Known ^= Known2;
1729 break;
1730 }
1731 case ISD::SELECT:
1732 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1733 Known, TLO, Depth + 1))
1734 return true;
1735 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1736 Known2, TLO, Depth + 1))
1737 return true;
1738
1739 // If the operands are constants, see if we can simplify them.
1740 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1741 return true;
1742
1743 // Only known if known in both the LHS and RHS.
1744 Known = Known.intersectWith(Known2);
1745 break;
1746 case ISD::VSELECT:
1747 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1748 Known, TLO, Depth + 1))
1749 return true;
1750 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1751 Known2, TLO, Depth + 1))
1752 return true;
1753
1754 // Only known if known in both the LHS and RHS.
1755 Known = Known.intersectWith(Known2);
1756 break;
1757 case ISD::SELECT_CC:
1758 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1759 Known, TLO, Depth + 1))
1760 return true;
1761 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1762 Known2, TLO, Depth + 1))
1763 return true;
1764
1765 // If the operands are constants, see if we can simplify them.
1766 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1767 return true;
1768
1769 // Only known if known in both the LHS and RHS.
1770 Known = Known.intersectWith(Known2);
1771 break;
1772 case ISD::SETCC: {
1773 SDValue Op0 = Op.getOperand(0);
1774 SDValue Op1 = Op.getOperand(1);
1775 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1776 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1777 // (X is of integer type) then we only need the sign mask of the previous
1778 // result
1779 if (Op1.getValueType().isInteger() &&
1780 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1781 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1782 isAllOnesOrAllOnesSplat(Op1)))) {
1783 KnownBits KnownOp0;
1786 DemandedElts, KnownOp0, TLO, Depth + 1))
1787 return true;
1788 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1789 // width as the setcc result, and (3) the result of a setcc conforms to 0
1790 // or -1, we may be able to bypass the setcc.
1791 if (DemandedBits.isSignMask() &&
1795 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1796 // NOT Operation
1797 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1798 SDLoc DL(Op);
1799 EVT VT = Op0.getValueType();
1800 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1801 return TLO.CombineTo(Op, NotOp0);
1802 }
1803 return TLO.CombineTo(Op, Op0);
1804 }
1805 }
1806 if (getBooleanContents(Op0.getValueType()) ==
1808 BitWidth > 1)
1809 Known.Zero.setBitsFrom(1);
1810 break;
1811 }
1812 case ISD::SHL: {
1813 SDValue Op0 = Op.getOperand(0);
1814 SDValue Op1 = Op.getOperand(1);
1815 EVT ShiftVT = Op1.getValueType();
1816
1817 if (std::optional<unsigned> KnownSA =
1818 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1819 unsigned ShAmt = *KnownSA;
1820 if (ShAmt == 0)
1821 return TLO.CombineTo(Op, Op0);
1822
1823 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1824 // single shift. We can do this if the bottom bits (which are shifted
1825 // out) are never demanded.
1826 // TODO - support non-uniform vector amounts.
1827 if (Op0.getOpcode() == ISD::SRL) {
1828 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1829 if (std::optional<unsigned> InnerSA =
1830 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1831 unsigned C1 = *InnerSA;
1832 unsigned Opc = ISD::SHL;
1833 int Diff = ShAmt - C1;
1834 if (Diff < 0) {
1835 Diff = -Diff;
1836 Opc = ISD::SRL;
1837 }
1838 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1839 return TLO.CombineTo(
1840 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1841 }
1842 }
1843 }
1844
1845 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1846 // are not demanded. This will likely allow the anyext to be folded away.
1847 // TODO - support non-uniform vector amounts.
1848 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1849 SDValue InnerOp = Op0.getOperand(0);
1850 EVT InnerVT = InnerOp.getValueType();
1851 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1852 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1853 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1854 SDValue NarrowShl = TLO.DAG.getNode(
1855 ISD::SHL, dl, InnerVT, InnerOp,
1856 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1857 return TLO.CombineTo(
1858 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1859 }
1860
1861 // Repeat the SHL optimization above in cases where an extension
1862 // intervenes: (shl (anyext (shr x, c1)), c2) to
1863 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1864 // aren't demanded (as above) and that the shifted upper c1 bits of
1865 // x aren't demanded.
1866 // TODO - support non-uniform vector amounts.
1867 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1868 InnerOp.hasOneUse()) {
1869 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1870 InnerOp, DemandedElts, Depth + 2)) {
1871 unsigned InnerShAmt = *SA2;
1872 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1873 DemandedBits.getActiveBits() <=
1874 (InnerBits - InnerShAmt + ShAmt) &&
1875 DemandedBits.countr_zero() >= ShAmt) {
1876 SDValue NewSA =
1877 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1878 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1879 InnerOp.getOperand(0));
1880 return TLO.CombineTo(
1881 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1882 }
1883 }
1884 }
1885 }
1886
1887 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1888 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1889 Depth + 1)) {
1890 // Disable the nsw and nuw flags. We can no longer guarantee that we
1891 // won't wrap after simplification.
1892 Op->dropFlags(SDNodeFlags::NoWrap);
1893 return true;
1894 }
1895 Known <<= ShAmt;
1896 // low bits known zero.
1897 Known.Zero.setLowBits(ShAmt);
1898
1899 // Attempt to avoid multi-use ops if we don't need anything from them.
1900 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1902 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1903 if (DemandedOp0) {
1904 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1905 return TLO.CombineTo(Op, NewOp);
1906 }
1907 }
1908
1909 // TODO: Can we merge this fold with the one below?
1910 // Try shrinking the operation as long as the shift amount will still be
1911 // in range.
1912 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1913 Op.getNode()->hasOneUse()) {
1914 // Search for the smallest integer type with free casts to and from
1915 // Op's type. For expedience, just check power-of-2 integer types.
1916 unsigned DemandedSize = DemandedBits.getActiveBits();
1917 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1918 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1919 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1920 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1921 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1922 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1923 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1924 assert(DemandedSize <= SmallVTBits &&
1925 "Narrowed below demanded bits?");
1926 // We found a type with free casts.
1927 SDValue NarrowShl = TLO.DAG.getNode(
1928 ISD::SHL, dl, SmallVT,
1929 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1930 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1931 return TLO.CombineTo(
1932 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1933 }
1934 }
1935 }
1936
1937 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1938 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1939 // Only do this if we demand the upper half so the knownbits are correct.
1940 unsigned HalfWidth = BitWidth / 2;
1941 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1942 DemandedBits.countLeadingOnes() >= HalfWidth) {
1943 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1944 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1945 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1946 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1947 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1948 // If we're demanding the upper bits at all, we must ensure
1949 // that the upper bits of the shift result are known to be zero,
1950 // which is equivalent to the narrow shift being NUW.
1951 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1952 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1953 SDNodeFlags Flags;
1954 Flags.setNoSignedWrap(IsNSW);
1955 Flags.setNoUnsignedWrap(IsNUW);
1956 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1957 SDValue NewShiftAmt =
1958 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1959 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1960 NewShiftAmt, Flags);
1961 SDValue NewExt =
1962 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1963 return TLO.CombineTo(Op, NewExt);
1964 }
1965 }
1966 }
1967 } else {
1968 // This is a variable shift, so we can't shift the demand mask by a known
1969 // amount. But if we are not demanding high bits, then we are not
1970 // demanding those bits from the pre-shifted operand either.
1971 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1972 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1973 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1974 Depth + 1)) {
1975 // Disable the nsw and nuw flags. We can no longer guarantee that we
1976 // won't wrap after simplification.
1977 Op->dropFlags(SDNodeFlags::NoWrap);
1978 return true;
1979 }
1980 Known.resetAll();
1981 }
1982 }
1983
1984 // If we are only demanding sign bits then we can use the shift source
1985 // directly.
1986 if (std::optional<unsigned> MaxSA =
1987 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1988 unsigned ShAmt = *MaxSA;
1989 unsigned NumSignBits =
1990 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1991 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1992 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1993 return TLO.CombineTo(Op, Op0);
1994 }
1995 break;
1996 }
1997 case ISD::SRL: {
1998 SDValue Op0 = Op.getOperand(0);
1999 SDValue Op1 = Op.getOperand(1);
2000 EVT ShiftVT = Op1.getValueType();
2001
2002 if (std::optional<unsigned> KnownSA =
2003 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2004 unsigned ShAmt = *KnownSA;
2005 if (ShAmt == 0)
2006 return TLO.CombineTo(Op, Op0);
2007
2008 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
2009 // single shift. We can do this if the top bits (which are shifted out)
2010 // are never demanded.
2011 // TODO - support non-uniform vector amounts.
2012 if (Op0.getOpcode() == ISD::SHL) {
2013 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2014 if (std::optional<unsigned> InnerSA =
2015 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2016 unsigned C1 = *InnerSA;
2017 unsigned Opc = ISD::SRL;
2018 int Diff = ShAmt - C1;
2019 if (Diff < 0) {
2020 Diff = -Diff;
2021 Opc = ISD::SHL;
2022 }
2023 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2024 return TLO.CombineTo(
2025 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2026 }
2027 }
2028 }
2029
2030 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2031 // single sra. We can do this if the top bits are never demanded.
2032 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2033 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2034 if (std::optional<unsigned> InnerSA =
2035 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2036 unsigned C1 = *InnerSA;
2037 // Clamp the combined shift amount if it exceeds the bit width.
2038 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2039 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2040 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2041 Op0.getOperand(0), NewSA));
2042 }
2043 }
2044 }
2045
2046 APInt InDemandedMask = (DemandedBits << ShAmt);
2047
2048 // If the shift is exact, then it does demand the low bits (and knows that
2049 // they are zero).
2050 if (Op->getFlags().hasExact())
2051 InDemandedMask.setLowBits(ShAmt);
2052
2053 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2054 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2055 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2057 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2058 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2059 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2060 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2061 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2062 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2063 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2064 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2065 SDValue NewShiftAmt =
2066 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2067 SDValue NewShift =
2068 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2069 return TLO.CombineTo(
2070 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2071 }
2072 }
2073
2074 // Compute the new bits that are at the top now.
2075 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2076 Depth + 1))
2077 return true;
2078 Known >>= ShAmt;
2079 // High bits known zero.
2080 Known.Zero.setHighBits(ShAmt);
2081
2082 // Attempt to avoid multi-use ops if we don't need anything from them.
2083 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2085 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2086 if (DemandedOp0) {
2087 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2088 return TLO.CombineTo(Op, NewOp);
2089 }
2090 }
2091 } else {
2092 // Use generic knownbits computation as it has support for non-uniform
2093 // shift amounts.
2094 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2095 }
2096
2097 // If we are only demanding sign bits then we can use the shift source
2098 // directly.
2099 if (std::optional<unsigned> MaxSA =
2100 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2101 unsigned ShAmt = *MaxSA;
2102 // Must already be signbits in DemandedBits bounds, and can't demand any
2103 // shifted in zeroes.
2104 if (DemandedBits.countl_zero() >= ShAmt) {
2105 unsigned NumSignBits =
2106 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2107 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2108 return TLO.CombineTo(Op, Op0);
2109 }
2110 }
2111
2112 // Try to match AVG patterns (after shift simplification).
2113 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2114 DemandedElts, Depth + 1))
2115 return TLO.CombineTo(Op, AVG);
2116
2117 break;
2118 }
2119 case ISD::SRA: {
2120 SDValue Op0 = Op.getOperand(0);
2121 SDValue Op1 = Op.getOperand(1);
2122 EVT ShiftVT = Op1.getValueType();
2123
2124 // If we only want bits that already match the signbit then we don't need
2125 // to shift.
2126 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2127 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2128 NumHiDemandedBits)
2129 return TLO.CombineTo(Op, Op0);
2130
2131 // If this is an arithmetic shift right and only the low-bit is set, we can
2132 // always convert this into a logical shr, even if the shift amount is
2133 // variable. The low bit of the shift cannot be an input sign bit unless
2134 // the shift amount is >= the size of the datatype, which is undefined.
2135 if (DemandedBits.isOne())
2136 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2137
2138 if (std::optional<unsigned> KnownSA =
2139 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2140 unsigned ShAmt = *KnownSA;
2141 if (ShAmt == 0)
2142 return TLO.CombineTo(Op, Op0);
2143
2144 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2145 // supports sext_inreg.
2146 if (Op0.getOpcode() == ISD::SHL) {
2147 if (std::optional<unsigned> InnerSA =
2148 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2149 unsigned LowBits = BitWidth - ShAmt;
2150 EVT ExtVT = VT.changeElementType(
2151 *TLO.DAG.getContext(),
2152 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2153
2154 if (*InnerSA == ShAmt) {
2155 if (!TLO.LegalOperations() ||
2157 return TLO.CombineTo(
2158 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2159 Op0.getOperand(0),
2160 TLO.DAG.getValueType(ExtVT)));
2161
2162 // Even if we can't convert to sext_inreg, we might be able to
2163 // remove this shift pair if the input is already sign extended.
2164 unsigned NumSignBits =
2165 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2166 if (NumSignBits > ShAmt)
2167 return TLO.CombineTo(Op, Op0.getOperand(0));
2168 }
2169 }
2170 }
2171
2172 APInt InDemandedMask = (DemandedBits << ShAmt);
2173
2174 // If the shift is exact, then it does demand the low bits (and knows that
2175 // they are zero).
2176 if (Op->getFlags().hasExact())
2177 InDemandedMask.setLowBits(ShAmt);
2178
2179 // If any of the demanded bits are produced by the sign extension, we also
2180 // demand the input sign bit.
2181 if (DemandedBits.countl_zero() < ShAmt)
2182 InDemandedMask.setSignBit();
2183
2184 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2185 Depth + 1))
2186 return true;
2187 Known >>= ShAmt;
2188
2189 // If the input sign bit is known to be zero, or if none of the top bits
2190 // are demanded, turn this into an unsigned shift right.
2191 if (Known.Zero[BitWidth - ShAmt - 1] ||
2192 DemandedBits.countl_zero() >= ShAmt) {
2193 SDNodeFlags Flags;
2194 Flags.setExact(Op->getFlags().hasExact());
2195 return TLO.CombineTo(
2196 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2197 }
2198
2199 int Log2 = DemandedBits.exactLogBase2();
2200 if (Log2 >= 0) {
2201 // The bit must come from the sign.
2202 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2203 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2204 }
2205
2206 if (Known.One[BitWidth - ShAmt - 1])
2207 // New bits are known one.
2208 Known.One.setHighBits(ShAmt);
2209
2210 // Attempt to avoid multi-use ops if we don't need anything from them.
2211 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2213 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2214 if (DemandedOp0) {
2215 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2216 return TLO.CombineTo(Op, NewOp);
2217 }
2218 }
2219 }
2220
2221 // Try to match AVG patterns (after shift simplification).
2222 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2223 DemandedElts, Depth + 1))
2224 return TLO.CombineTo(Op, AVG);
2225
2226 break;
2227 }
2228 case ISD::FSHL:
2229 case ISD::FSHR: {
2230 SDValue Op0 = Op.getOperand(0);
2231 SDValue Op1 = Op.getOperand(1);
2232 SDValue Op2 = Op.getOperand(2);
2233 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2234
2235 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2236 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2237
2238 // For fshl, 0-shift returns the 1st arg.
2239 // For fshr, 0-shift returns the 2nd arg.
2240 if (Amt == 0) {
2241 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2242 Known, TLO, Depth + 1))
2243 return true;
2244 break;
2245 }
2246
2247 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2248 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2249 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2250 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2251 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2252 Depth + 1))
2253 return true;
2254 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2255 Depth + 1))
2256 return true;
2257
2258 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2259 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2260 Known = Known.unionWith(Known2);
2261
2262 // Attempt to avoid multi-use ops if we don't need anything from them.
2263 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2264 !DemandedElts.isAllOnes()) {
2266 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2268 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2269 if (DemandedOp0 || DemandedOp1) {
2270 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2271 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2272 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2273 DemandedOp1, Op2);
2274 return TLO.CombineTo(Op, NewOp);
2275 }
2276 }
2277 }
2278
2279 if (isPowerOf2_32(BitWidth)) {
2280 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2281 // iff we're guaranteed not to use Op0.
2282 // TODO: Add FSHL equivalent?
2283 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2284 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2285 KnownBits KnownAmt =
2286 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2287 unsigned MaxShiftAmt =
2288 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2289 // Check we don't demand any shifted bits outside Op1.
2290 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2291 EVT AmtVT = Op2.getValueType();
2292 SDValue NewAmt =
2293 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2294 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2295 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2296 return TLO.CombineTo(Op, NewOp);
2297 }
2298 }
2299
2300 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2301 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2302 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2303 Depth + 1))
2304 return true;
2305 }
2306 break;
2307 }
2308 case ISD::ROTL:
2309 case ISD::ROTR: {
2310 SDValue Op0 = Op.getOperand(0);
2311 SDValue Op1 = Op.getOperand(1);
2312 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2313
2314 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2315 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2316 return TLO.CombineTo(Op, Op0);
2317
2318 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2319 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2320 unsigned RevAmt = BitWidth - Amt;
2321
2322 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2323 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2324 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2325 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2326 Depth + 1))
2327 return true;
2328
2329 // rot*(x, 0) --> x
2330 if (Amt == 0)
2331 return TLO.CombineTo(Op, Op0);
2332
2333 // See if we don't demand either half of the rotated bits.
2334 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2335 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2336 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2337 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2338 }
2339 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2340 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2341 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2342 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2343 }
2344 }
2345
2346 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2347 if (isPowerOf2_32(BitWidth)) {
2348 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2349 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2350 Depth + 1))
2351 return true;
2352 }
2353 break;
2354 }
2355 case ISD::SMIN:
2356 case ISD::SMAX:
2357 case ISD::UMIN:
2358 case ISD::UMAX: {
2359 unsigned Opc = Op.getOpcode();
2360 SDValue Op0 = Op.getOperand(0);
2361 SDValue Op1 = Op.getOperand(1);
2362
2363 // If we're only demanding signbits, then we can simplify to OR/AND node.
2364 unsigned BitOp =
2365 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2366 unsigned NumSignBits =
2367 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2368 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2369 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2370 if (NumSignBits >= NumDemandedUpperBits)
2371 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2372
2373 // Check if one arg is always less/greater than (or equal) to the other arg.
2374 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2375 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2376 switch (Opc) {
2377 case ISD::SMIN:
2378 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2379 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2380 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2381 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2382 Known = KnownBits::smin(Known0, Known1);
2383 break;
2384 case ISD::SMAX:
2385 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2386 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2387 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2388 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2389 Known = KnownBits::smax(Known0, Known1);
2390 break;
2391 case ISD::UMIN:
2392 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2393 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2394 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2395 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2396 Known = KnownBits::umin(Known0, Known1);
2397 break;
2398 case ISD::UMAX:
2399 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2400 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2401 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2402 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2403 Known = KnownBits::umax(Known0, Known1);
2404 break;
2405 }
2406 break;
2407 }
2408 case ISD::BITREVERSE: {
2409 SDValue Src = Op.getOperand(0);
2410 APInt DemandedSrcBits = DemandedBits.reverseBits();
2411 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2412 Depth + 1))
2413 return true;
2414 Known = Known2.reverseBits();
2415 break;
2416 }
2417 case ISD::BSWAP: {
2418 SDValue Src = Op.getOperand(0);
2419
2420 // If the only bits demanded come from one byte of the bswap result,
2421 // just shift the input byte into position to eliminate the bswap.
2422 unsigned NLZ = DemandedBits.countl_zero();
2423 unsigned NTZ = DemandedBits.countr_zero();
2424
2425 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2426 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2427 // have 14 leading zeros, round to 8.
2428 NLZ = alignDown(NLZ, 8);
2429 NTZ = alignDown(NTZ, 8);
2430 // If we need exactly one byte, we can do this transformation.
2431 if (BitWidth - NLZ - NTZ == 8) {
2432 // Replace this with either a left or right shift to get the byte into
2433 // the right place.
2434 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2435 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2436 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2437 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2438 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2439 return TLO.CombineTo(Op, NewOp);
2440 }
2441 }
2442
2443 APInt DemandedSrcBits = DemandedBits.byteSwap();
2444 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2445 Depth + 1))
2446 return true;
2447 Known = Known2.byteSwap();
2448 break;
2449 }
2450 case ISD::CTPOP: {
2451 // If only 1 bit is demanded, replace with PARITY as long as we're before
2452 // op legalization.
2453 // FIXME: Limit to scalars for now.
2454 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2455 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2456 Op.getOperand(0)));
2457
2458 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2459 break;
2460 }
2462 SDValue Op0 = Op.getOperand(0);
2463 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2464 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2465
2466 // If we only care about the highest bit, don't bother shifting right.
2467 if (DemandedBits.isSignMask()) {
2468 unsigned MinSignedBits =
2469 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2470 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2471 // However if the input is already sign extended we expect the sign
2472 // extension to be dropped altogether later and do not simplify.
2473 if (!AlreadySignExtended) {
2474 // Compute the correct shift amount type, which must be getShiftAmountTy
2475 // for scalar types after legalization.
2476 SDValue ShiftAmt =
2477 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2478 return TLO.CombineTo(Op,
2479 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2480 }
2481 }
2482
2483 // If none of the extended bits are demanded, eliminate the sextinreg.
2484 if (DemandedBits.getActiveBits() <= ExVTBits)
2485 return TLO.CombineTo(Op, Op0);
2486
2487 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2488
2489 // Since the sign extended bits are demanded, we know that the sign
2490 // bit is demanded.
2491 InputDemandedBits.setBit(ExVTBits - 1);
2492
2493 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2494 Depth + 1))
2495 return true;
2496
2497 // If the sign bit of the input is known set or clear, then we know the
2498 // top bits of the result.
2499
2500 // If the input sign bit is known zero, convert this into a zero extension.
2501 if (Known.Zero[ExVTBits - 1])
2502 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2503
2504 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2505 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2506 Known.One.setBitsFrom(ExVTBits);
2507 Known.Zero &= Mask;
2508 } else { // Input sign bit unknown
2509 Known.Zero &= Mask;
2510 Known.One &= Mask;
2511 }
2512 break;
2513 }
2514 case ISD::BUILD_PAIR: {
2515 EVT HalfVT = Op.getOperand(0).getValueType();
2516 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2517
2518 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2519 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2520
2521 KnownBits KnownLo, KnownHi;
2522
2523 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2524 return true;
2525
2526 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2527 return true;
2528
2529 Known = KnownHi.concat(KnownLo);
2530 break;
2531 }
2533 if (VT.isScalableVector())
2534 return false;
2535 [[fallthrough]];
2536 case ISD::ZERO_EXTEND: {
2537 SDValue Src = Op.getOperand(0);
2538 EVT SrcVT = Src.getValueType();
2539 unsigned InBits = SrcVT.getScalarSizeInBits();
2540 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2541 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2542
2543 // If none of the top bits are demanded, convert this into an any_extend.
2544 if (DemandedBits.getActiveBits() <= InBits) {
2545 // If we only need the non-extended bits of the bottom element
2546 // then we can just bitcast to the result.
2547 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2548 VT.getSizeInBits() == SrcVT.getSizeInBits())
2549 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2550
2551 unsigned Opc =
2553 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2554 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2555 }
2556
2557 APInt InDemandedBits = DemandedBits.trunc(InBits);
2558 APInt InDemandedElts = DemandedElts.zext(InElts);
2559 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2560 Depth + 1)) {
2561 Op->dropFlags(SDNodeFlags::NonNeg);
2562 return true;
2563 }
2564 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2565 Known = Known.zext(BitWidth);
2566
2567 // Attempt to avoid multi-use ops if we don't need anything from them.
2569 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2570 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2571 break;
2572 }
2574 if (VT.isScalableVector())
2575 return false;
2576 [[fallthrough]];
2577 case ISD::SIGN_EXTEND: {
2578 SDValue Src = Op.getOperand(0);
2579 EVT SrcVT = Src.getValueType();
2580 unsigned InBits = SrcVT.getScalarSizeInBits();
2581 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2582 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2583
2584 APInt InDemandedElts = DemandedElts.zext(InElts);
2585 APInt InDemandedBits = DemandedBits.trunc(InBits);
2586
2587 // Since some of the sign extended bits are demanded, we know that the sign
2588 // bit is demanded.
2589 InDemandedBits.setBit(InBits - 1);
2590
2591 // If none of the top bits are demanded, convert this into an any_extend.
2592 if (DemandedBits.getActiveBits() <= InBits) {
2593 // If we only need the non-extended bits of the bottom element
2594 // then we can just bitcast to the result.
2595 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2596 VT.getSizeInBits() == SrcVT.getSizeInBits())
2597 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2598
2599 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2601 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2602 InBits) {
2603 unsigned Opc =
2605 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2606 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2607 }
2608 }
2609
2610 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2611 Depth + 1))
2612 return true;
2613 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2614
2615 // If the sign bit is known one, the top bits match.
2616 Known = Known.sext(BitWidth);
2617
2618 // If the sign bit is known zero, convert this to a zero extend.
2619 if (Known.isNonNegative()) {
2620 unsigned Opc =
2622 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2623 SDNodeFlags Flags;
2624 if (!IsVecInReg)
2625 Flags |= SDNodeFlags::NonNeg;
2626 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2627 }
2628 }
2629
2630 // Attempt to avoid multi-use ops if we don't need anything from them.
2632 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2633 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2634 break;
2635 }
2637 if (VT.isScalableVector())
2638 return false;
2639 [[fallthrough]];
2640 case ISD::ANY_EXTEND: {
2641 SDValue Src = Op.getOperand(0);
2642 EVT SrcVT = Src.getValueType();
2643 unsigned InBits = SrcVT.getScalarSizeInBits();
2644 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2645 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2646
2647 // If we only need the bottom element then we can just bitcast.
2648 // TODO: Handle ANY_EXTEND?
2649 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2650 VT.getSizeInBits() == SrcVT.getSizeInBits())
2651 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2652
2653 APInt InDemandedBits = DemandedBits.trunc(InBits);
2654 APInt InDemandedElts = DemandedElts.zext(InElts);
2655 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2656 Depth + 1))
2657 return true;
2658 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2659 Known = Known.anyext(BitWidth);
2660
2661 // Attempt to avoid multi-use ops if we don't need anything from them.
2663 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2664 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2665 break;
2666 }
2667 case ISD::TRUNCATE: {
2668 SDValue Src = Op.getOperand(0);
2669
2670 // Simplify the input, using demanded bit information, and compute the known
2671 // zero/one bits live out.
2672 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2673 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2674 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2675 Depth + 1)) {
2676 // Disable the nsw and nuw flags. We can no longer guarantee that we
2677 // won't wrap after simplification.
2678 Op->dropFlags(SDNodeFlags::NoWrap);
2679 return true;
2680 }
2681 Known = Known.trunc(BitWidth);
2682
2683 // Attempt to avoid multi-use ops if we don't need anything from them.
2685 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2686 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2687
2688 // If the input is only used by this truncate, see if we can shrink it based
2689 // on the known demanded bits.
2690 switch (Src.getOpcode()) {
2691 default:
2692 break;
2693 case ISD::SRL:
2694 // Shrink SRL by a constant if none of the high bits shifted in are
2695 // demanded.
2696 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2697 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2698 // undesirable.
2699 break;
2700
2701 if (Src.getNode()->hasOneUse()) {
2702 if (isTruncateFree(Src, VT) &&
2703 !isTruncateFree(Src.getValueType(), VT)) {
2704 // If truncate is only free at trunc(srl), do not turn it into
2705 // srl(trunc). The check is done by first check the truncate is free
2706 // at Src's opcode(srl), then check the truncate is not done by
2707 // referencing sub-register. In test, if both trunc(srl) and
2708 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2709 // trunc(srl)'s trunc is free, trunc(srl) is better.
2710 break;
2711 }
2712
2713 std::optional<unsigned> ShAmtC =
2714 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2715 if (!ShAmtC || *ShAmtC >= BitWidth)
2716 break;
2717 unsigned ShVal = *ShAmtC;
2718
2719 APInt HighBits =
2720 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2721 HighBits.lshrInPlace(ShVal);
2722 HighBits = HighBits.trunc(BitWidth);
2723 if (!(HighBits & DemandedBits)) {
2724 // None of the shifted in bits are needed. Add a truncate of the
2725 // shift input, then shift it.
2726 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2727 SDValue NewTrunc =
2728 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2729 return TLO.CombineTo(
2730 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2731 }
2732 }
2733 break;
2734 }
2735
2736 break;
2737 }
2738 case ISD::AssertZext: {
2739 // AssertZext demands all of the high bits, plus any of the low bits
2740 // demanded by its users.
2741 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2743 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2744 TLO, Depth + 1))
2745 return true;
2746
2747 Known.Zero |= ~InMask;
2748 Known.One &= (~Known.Zero);
2749 break;
2750 }
2752 SDValue Src = Op.getOperand(0);
2753 SDValue Idx = Op.getOperand(1);
2754 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2755 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2756
2757 if (SrcEltCnt.isScalable())
2758 return false;
2759
2760 // Demand the bits from every vector element without a constant index.
2761 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2762 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2763 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2764 if (CIdx->getAPIntValue().ult(NumSrcElts))
2765 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2766
2767 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2768 // anything about the extended bits.
2769 APInt DemandedSrcBits = DemandedBits;
2770 if (BitWidth > EltBitWidth)
2771 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2772
2773 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2774 Depth + 1))
2775 return true;
2776
2777 // Attempt to avoid multi-use ops if we don't need anything from them.
2778 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2779 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2780 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2781 SDValue NewOp =
2782 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2783 return TLO.CombineTo(Op, NewOp);
2784 }
2785 }
2786
2787 Known = Known2;
2788 if (BitWidth > EltBitWidth)
2789 Known = Known.anyext(BitWidth);
2790 break;
2791 }
2792 case ISD::BITCAST: {
2793 if (VT.isScalableVector())
2794 return false;
2795 SDValue Src = Op.getOperand(0);
2796 EVT SrcVT = Src.getValueType();
2797 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2798
2799 // If this is an FP->Int bitcast and if the sign bit is the only
2800 // thing demanded, turn this into a FGETSIGN.
2801 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2802 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2803 SrcVT.isFloatingPoint()) {
2805 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2806 // place. We expect the SHL to be eliminated by other optimizations.
2807 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, VT, Src);
2808 unsigned ShVal = Op.getValueSizeInBits() - 1;
2809 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2810 return TLO.CombineTo(Op,
2811 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2812 }
2813 }
2814
2815 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2816 // Demand the elt/bit if any of the original elts/bits are demanded.
2817 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2818 unsigned Scale = BitWidth / NumSrcEltBits;
2819 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2820 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2821 for (unsigned i = 0; i != Scale; ++i) {
2822 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2823 unsigned BitOffset = EltOffset * NumSrcEltBits;
2824 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2825 }
2826 // Recursive calls below may turn not demanded elements into poison, so we
2827 // need to demand all smaller source elements that maps to a demanded
2828 // destination element.
2829 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2830
2831 APInt KnownSrcUndef, KnownSrcZero;
2832 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2833 KnownSrcZero, TLO, Depth + 1))
2834 return true;
2835
2836 KnownBits KnownSrcBits;
2837 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2838 KnownSrcBits, TLO, Depth + 1))
2839 return true;
2840 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2841 // TODO - bigendian once we have test coverage.
2842 unsigned Scale = NumSrcEltBits / BitWidth;
2843 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2844 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2845 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2846 for (unsigned i = 0; i != NumElts; ++i)
2847 if (DemandedElts[i]) {
2848 unsigned Offset = (i % Scale) * BitWidth;
2849 DemandedSrcBits.insertBits(DemandedBits, Offset);
2850 DemandedSrcElts.setBit(i / Scale);
2851 }
2852
2853 if (SrcVT.isVector()) {
2854 APInt KnownSrcUndef, KnownSrcZero;
2855 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2856 KnownSrcZero, TLO, Depth + 1))
2857 return true;
2858 }
2859
2860 KnownBits KnownSrcBits;
2861 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2862 KnownSrcBits, TLO, Depth + 1))
2863 return true;
2864
2865 // Attempt to avoid multi-use ops if we don't need anything from them.
2866 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2867 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2868 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2869 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2870 return TLO.CombineTo(Op, NewOp);
2871 }
2872 }
2873 }
2874
2875 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2876 // recursive call where Known may be useful to the caller.
2877 if (Depth > 0) {
2878 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2879 return false;
2880 }
2881 break;
2882 }
2883 case ISD::MUL:
2884 if (DemandedBits.isPowerOf2()) {
2885 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2886 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2887 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2888 unsigned CTZ = DemandedBits.countr_zero();
2889 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2890 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2891 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2892 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2893 return TLO.CombineTo(Op, Shl);
2894 }
2895 }
2896 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2897 // X * X is odd iff X is odd.
2898 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2899 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2900 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2901 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2902 return TLO.CombineTo(Op, And1);
2903 }
2904 [[fallthrough]];
2905 case ISD::PTRADD:
2906 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2907 break;
2908 // PTRADD behaves like ADD if pointers are represented as integers.
2909 [[fallthrough]];
2910 case ISD::ADD:
2911 case ISD::SUB: {
2912 // Add, Sub, and Mul don't demand any bits in positions beyond that
2913 // of the highest bit demanded of them.
2914 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2915 SDNodeFlags Flags = Op.getNode()->getFlags();
2916 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2917 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2918 KnownBits KnownOp0, KnownOp1;
2919 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2920 const KnownBits &KnownRHS) {
2921 if (Op.getOpcode() == ISD::MUL)
2922 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2923 return Demanded;
2924 };
2925 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2926 Depth + 1) ||
2927 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2928 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2929 // See if the operation should be performed at a smaller bit width.
2931 // Disable the nsw and nuw flags. We can no longer guarantee that we
2932 // won't wrap after simplification.
2933 Op->dropFlags(SDNodeFlags::NoWrap);
2934 return true;
2935 }
2936
2937 // neg x with only low bit demanded is simply x.
2938 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2939 isNullConstant(Op0))
2940 return TLO.CombineTo(Op, Op1);
2941
2942 // Attempt to avoid multi-use ops if we don't need anything from them.
2943 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2945 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2947 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2948 if (DemandedOp0 || DemandedOp1) {
2949 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2950 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2951 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2952 Flags & ~SDNodeFlags::NoWrap);
2953 return TLO.CombineTo(Op, NewOp);
2954 }
2955 }
2956
2957 // If we have a constant operand, we may be able to turn it into -1 if we
2958 // do not demand the high bits. This can make the constant smaller to
2959 // encode, allow more general folding, or match specialized instruction
2960 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2961 // is probably not useful (and could be detrimental).
2963 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2964 if (C && !C->isAllOnes() && !C->isOne() &&
2965 (C->getAPIntValue() | HighMask).isAllOnes()) {
2966 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2967 // Disable the nsw and nuw flags. We can no longer guarantee that we
2968 // won't wrap after simplification.
2969 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2970 Flags & ~SDNodeFlags::NoWrap);
2971 return TLO.CombineTo(Op, NewOp);
2972 }
2973
2974 // Match a multiply with a disguised negated-power-of-2 and convert to a
2975 // an equivalent shift-left amount.
2976 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2977 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2978 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2979 return 0;
2980
2981 // Don't touch opaque constants. Also, ignore zero and power-of-2
2982 // multiplies. Those will get folded later.
2983 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2984 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2985 !MulC->getAPIntValue().isPowerOf2()) {
2986 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2987 if (UnmaskedC.isNegatedPowerOf2())
2988 return (-UnmaskedC).logBase2();
2989 }
2990 return 0;
2991 };
2992
2993 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2994 unsigned ShlAmt) {
2995 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2996 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2997 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2998 return TLO.CombineTo(Op, Res);
2999 };
3000
3002 if (Op.getOpcode() == ISD::ADD) {
3003 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3004 if (unsigned ShAmt = getShiftLeftAmt(Op0))
3005 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
3006 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
3007 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3008 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
3009 }
3010 if (Op.getOpcode() == ISD::SUB) {
3011 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3012 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3013 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3014 }
3015 }
3016
3017 if (Op.getOpcode() == ISD::MUL) {
3018 Known = KnownBits::mul(KnownOp0, KnownOp1);
3019 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3021 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3022 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3023 }
3024 break;
3025 }
3026 case ISD::FABS: {
3027 SDValue Op0 = Op.getOperand(0);
3028 APInt SignMask = APInt::getSignMask(BitWidth);
3029
3030 if (!DemandedBits.intersects(SignMask))
3031 return TLO.CombineTo(Op, Op0);
3032
3033 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3034 Depth + 1))
3035 return true;
3036
3037 if (Known.isNonNegative())
3038 return TLO.CombineTo(Op, Op0);
3039 if (Known.isNegative())
3040 return TLO.CombineTo(
3041 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3042
3043 Known.Zero |= SignMask;
3044 Known.One &= ~SignMask;
3045
3046 break;
3047 }
3048 case ISD::FCOPYSIGN: {
3049 SDValue Op0 = Op.getOperand(0);
3050 SDValue Op1 = Op.getOperand(1);
3051
3052 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3053 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3054 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3055 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3056
3057 if (!DemandedBits.intersects(SignMask0))
3058 return TLO.CombineTo(Op, Op0);
3059
3060 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3061 Known, TLO, Depth + 1) ||
3062 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3063 Depth + 1))
3064 return true;
3065
3066 if (Known2.isNonNegative())
3067 return TLO.CombineTo(
3068 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3069
3070 if (Known2.isNegative())
3071 return TLO.CombineTo(
3072 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3073 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3074
3075 Known.Zero &= ~SignMask0;
3076 Known.One &= ~SignMask0;
3077 break;
3078 }
3079 case ISD::FNEG: {
3080 SDValue Op0 = Op.getOperand(0);
3081 APInt SignMask = APInt::getSignMask(BitWidth);
3082
3083 if (!DemandedBits.intersects(SignMask))
3084 return TLO.CombineTo(Op, Op0);
3085
3086 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3087 Depth + 1))
3088 return true;
3089
3090 if (!Known.isSignUnknown()) {
3091 Known.Zero ^= SignMask;
3092 Known.One ^= SignMask;
3093 }
3094
3095 break;
3096 }
3097 default:
3098 // We also ask the target about intrinsics (which could be specific to it).
3099 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3100 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3101 // TODO: Probably okay to remove after audit; here to reduce change size
3102 // in initial enablement patch for scalable vectors
3103 if (Op.getValueType().isScalableVector())
3104 break;
3106 Known, TLO, Depth))
3107 return true;
3108 break;
3109 }
3110
3111 // Just use computeKnownBits to compute output bits.
3112 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3113 break;
3114 }
3115
3116 // If we know the value of all of the demanded bits, return this as a
3117 // constant.
3119 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3120 // Avoid folding to a constant if any OpaqueConstant is involved.
3121 if (llvm::any_of(Op->ops(), [](SDValue V) {
3122 auto *C = dyn_cast<ConstantSDNode>(V);
3123 return C && C->isOpaque();
3124 }))
3125 return false;
3126 if (VT.isInteger())
3127 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3128 if (VT.isFloatingPoint())
3129 return TLO.CombineTo(
3130 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3131 dl, VT));
3132 }
3133
3134 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3135 // Try again just for the original demanded elts.
3136 // Ensure we do this AFTER constant folding above.
3137 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3138 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3139
3140 return false;
3141}
3142
3144 const APInt &DemandedElts,
3145 DAGCombinerInfo &DCI) const {
3146 SelectionDAG &DAG = DCI.DAG;
3147 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3148 !DCI.isBeforeLegalizeOps());
3149
3150 APInt KnownUndef, KnownZero;
3151 bool Simplified =
3152 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3153 if (Simplified) {
3154 DCI.AddToWorklist(Op.getNode());
3155 DCI.CommitTargetLoweringOpt(TLO);
3156 }
3157
3158 return Simplified;
3159}
3160
3161/// Given a vector binary operation and known undefined elements for each input
3162/// operand, compute whether each element of the output is undefined.
3164 const APInt &UndefOp0,
3165 const APInt &UndefOp1) {
3166 EVT VT = BO.getValueType();
3168 "Vector binop only");
3169
3170 EVT EltVT = VT.getVectorElementType();
3171 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3172 assert(UndefOp0.getBitWidth() == NumElts &&
3173 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3174
3175 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3176 const APInt &UndefVals) {
3177 if (UndefVals[Index])
3178 return DAG.getUNDEF(EltVT);
3179
3180 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3181 // Try hard to make sure that the getNode() call is not creating temporary
3182 // nodes. Ignore opaque integers because they do not constant fold.
3183 SDValue Elt = BV->getOperand(Index);
3184 auto *C = dyn_cast<ConstantSDNode>(Elt);
3185 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3186 return Elt;
3187 }
3188
3189 return SDValue();
3190 };
3191
3192 APInt KnownUndef = APInt::getZero(NumElts);
3193 for (unsigned i = 0; i != NumElts; ++i) {
3194 // If both inputs for this element are either constant or undef and match
3195 // the element type, compute the constant/undef result for this element of
3196 // the vector.
3197 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3198 // not handle FP constants. The code within getNode() should be refactored
3199 // to avoid the danger of creating a bogus temporary node here.
3200 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3201 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3202 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3203 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3204 KnownUndef.setBit(i);
3205 }
3206 return KnownUndef;
3207}
3208
3210 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3211 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3212 bool AssumeSingleUse) const {
3213 EVT VT = Op.getValueType();
3214 unsigned Opcode = Op.getOpcode();
3215 APInt DemandedElts = OriginalDemandedElts;
3216 unsigned NumElts = DemandedElts.getBitWidth();
3217 assert(VT.isVector() && "Expected vector op");
3218
3219 KnownUndef = KnownZero = APInt::getZero(NumElts);
3220
3222 return false;
3223
3224 // TODO: For now we assume we know nothing about scalable vectors.
3225 if (VT.isScalableVector())
3226 return false;
3227
3228 assert(VT.getVectorNumElements() == NumElts &&
3229 "Mask size mismatches value type element count!");
3230
3231 // Undef operand.
3232 if (Op.isUndef()) {
3233 KnownUndef.setAllBits();
3234 return false;
3235 }
3236
3237 // If Op has other users, assume that all elements are needed.
3238 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3239 DemandedElts.setAllBits();
3240
3241 // Not demanding any elements from Op.
3242 if (DemandedElts == 0) {
3243 KnownUndef.setAllBits();
3244 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3245 }
3246
3247 // Limit search depth.
3249 return false;
3250
3251 SDLoc DL(Op);
3252 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3253 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3254
3255 // Helper for demanding the specified elements and all the bits of both binary
3256 // operands.
3257 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3258 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3259 TLO.DAG, Depth + 1);
3260 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3261 TLO.DAG, Depth + 1);
3262 if (NewOp0 || NewOp1) {
3263 SDValue NewOp =
3264 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3265 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3266 return TLO.CombineTo(Op, NewOp);
3267 }
3268 return false;
3269 };
3270
3271 switch (Opcode) {
3272 case ISD::SCALAR_TO_VECTOR: {
3273 if (!DemandedElts[0]) {
3274 KnownUndef.setAllBits();
3275 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3276 }
3277 KnownUndef.setHighBits(NumElts - 1);
3278 break;
3279 }
3280 case ISD::BITCAST: {
3281 SDValue Src = Op.getOperand(0);
3282 EVT SrcVT = Src.getValueType();
3283
3284 if (!SrcVT.isVector()) {
3285 // TODO - bigendian once we have test coverage.
3286 if (IsLE) {
3287 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3288 unsigned EltSize = VT.getScalarSizeInBits();
3289 for (unsigned I = 0; I != NumElts; ++I) {
3290 if (DemandedElts[I]) {
3291 unsigned Offset = I * EltSize;
3292 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3293 }
3294 }
3295 KnownBits Known;
3296 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3297 return true;
3298 }
3299 break;
3300 }
3301
3302 // Fast handling of 'identity' bitcasts.
3303 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3304 if (NumSrcElts == NumElts)
3305 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3306 KnownZero, TLO, Depth + 1);
3307
3308 APInt SrcDemandedElts, SrcZero, SrcUndef;
3309
3310 // Bitcast from 'large element' src vector to 'small element' vector, we
3311 // must demand a source element if any DemandedElt maps to it.
3312 if ((NumElts % NumSrcElts) == 0) {
3313 unsigned Scale = NumElts / NumSrcElts;
3314 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3315 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3316 TLO, Depth + 1))
3317 return true;
3318
3319 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3320 // of the large element.
3321 // TODO - bigendian once we have test coverage.
3322 if (IsLE) {
3323 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3324 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3325 for (unsigned i = 0; i != NumElts; ++i)
3326 if (DemandedElts[i]) {
3327 unsigned Ofs = (i % Scale) * EltSizeInBits;
3328 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3329 }
3330
3331 KnownBits Known;
3332 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3333 TLO, Depth + 1))
3334 return true;
3335
3336 // The bitcast has split each wide element into a number of
3337 // narrow subelements. We have just computed the Known bits
3338 // for wide elements. See if element splitting results in
3339 // some subelements being zero. Only for demanded elements!
3340 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3341 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3342 .isAllOnes())
3343 continue;
3344 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3345 unsigned Elt = Scale * SrcElt + SubElt;
3346 if (DemandedElts[Elt])
3347 KnownZero.setBit(Elt);
3348 }
3349 }
3350 }
3351
3352 // If the src element is zero/undef then all the output elements will be -
3353 // only demanded elements are guaranteed to be correct.
3354 for (unsigned i = 0; i != NumSrcElts; ++i) {
3355 if (SrcDemandedElts[i]) {
3356 if (SrcZero[i])
3357 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3358 if (SrcUndef[i])
3359 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3360 }
3361 }
3362 }
3363
3364 // Bitcast from 'small element' src vector to 'large element' vector, we
3365 // demand all smaller source elements covered by the larger demanded element
3366 // of this vector.
3367 if ((NumSrcElts % NumElts) == 0) {
3368 unsigned Scale = NumSrcElts / NumElts;
3369 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3370 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3371 TLO, Depth + 1))
3372 return true;
3373
3374 // If all the src elements covering an output element are zero/undef, then
3375 // the output element will be as well, assuming it was demanded.
3376 for (unsigned i = 0; i != NumElts; ++i) {
3377 if (DemandedElts[i]) {
3378 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3379 KnownZero.setBit(i);
3380 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3381 KnownUndef.setBit(i);
3382 }
3383 }
3384 }
3385 break;
3386 }
3387 case ISD::FREEZE: {
3388 SDValue N0 = Op.getOperand(0);
3390 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
3391 return TLO.CombineTo(Op, N0);
3392
3393 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3394 // freeze(op(x, ...)) -> op(freeze(x), ...).
3395 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3396 return TLO.CombineTo(
3398 TLO.DAG.getFreeze(N0.getOperand(0))));
3399 break;
3400 }
3401 case ISD::BUILD_VECTOR: {
3402 // Check all elements and simplify any unused elements with UNDEF.
3403 if (!DemandedElts.isAllOnes()) {
3404 // Don't simplify BROADCASTS.
3405 if (llvm::any_of(Op->op_values(),
3406 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3408 bool Updated = false;
3409 for (unsigned i = 0; i != NumElts; ++i) {
3410 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3411 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3412 KnownUndef.setBit(i);
3413 Updated = true;
3414 }
3415 }
3416 if (Updated)
3417 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3418 }
3419 }
3420 for (unsigned i = 0; i != NumElts; ++i) {
3421 SDValue SrcOp = Op.getOperand(i);
3422 if (SrcOp.isUndef()) {
3423 KnownUndef.setBit(i);
3424 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3426 KnownZero.setBit(i);
3427 }
3428 }
3429 break;
3430 }
3431 case ISD::CONCAT_VECTORS: {
3432 EVT SubVT = Op.getOperand(0).getValueType();
3433 unsigned NumSubVecs = Op.getNumOperands();
3434 unsigned NumSubElts = SubVT.getVectorNumElements();
3435 for (unsigned i = 0; i != NumSubVecs; ++i) {
3436 SDValue SubOp = Op.getOperand(i);
3437 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3438 APInt SubUndef, SubZero;
3439 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3440 Depth + 1))
3441 return true;
3442 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3443 KnownZero.insertBits(SubZero, i * NumSubElts);
3444 }
3445
3446 // Attempt to avoid multi-use ops if we don't need anything from them.
3447 if (!DemandedElts.isAllOnes()) {
3448 bool FoundNewSub = false;
3449 SmallVector<SDValue, 2> DemandedSubOps;
3450 for (unsigned i = 0; i != NumSubVecs; ++i) {
3451 SDValue SubOp = Op.getOperand(i);
3452 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3454 SubOp, SubElts, TLO.DAG, Depth + 1);
3455 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3456 FoundNewSub = NewSubOp ? true : FoundNewSub;
3457 }
3458 if (FoundNewSub) {
3459 SDValue NewOp =
3460 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3461 return TLO.CombineTo(Op, NewOp);
3462 }
3463 }
3464 break;
3465 }
3466 case ISD::INSERT_SUBVECTOR: {
3467 // Demand any elements from the subvector and the remainder from the src it
3468 // is inserted into.
3469 SDValue Src = Op.getOperand(0);
3470 SDValue Sub = Op.getOperand(1);
3471 uint64_t Idx = Op.getConstantOperandVal(2);
3472 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3473 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3474 APInt DemandedSrcElts = DemandedElts;
3475 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3476
3477 // If none of the sub operand elements are demanded, bypass the insert.
3478 if (!DemandedSubElts)
3479 return TLO.CombineTo(Op, Src);
3480
3481 APInt SubUndef, SubZero;
3482 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3483 Depth + 1))
3484 return true;
3485
3486 // If none of the src operand elements are demanded, replace it with undef.
3487 if (!DemandedSrcElts && !Src.isUndef())
3488 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3489 TLO.DAG.getUNDEF(VT), Sub,
3490 Op.getOperand(2)));
3491
3492 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3493 TLO, Depth + 1))
3494 return true;
3495 KnownUndef.insertBits(SubUndef, Idx);
3496 KnownZero.insertBits(SubZero, Idx);
3497
3498 // Attempt to avoid multi-use ops if we don't need anything from them.
3499 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3501 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3503 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3504 if (NewSrc || NewSub) {
3505 NewSrc = NewSrc ? NewSrc : Src;
3506 NewSub = NewSub ? NewSub : Sub;
3507 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3508 NewSub, Op.getOperand(2));
3509 return TLO.CombineTo(Op, NewOp);
3510 }
3511 }
3512 break;
3513 }
3515 // Offset the demanded elts by the subvector index.
3516 SDValue Src = Op.getOperand(0);
3517 if (Src.getValueType().isScalableVector())
3518 break;
3519 uint64_t Idx = Op.getConstantOperandVal(1);
3520 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3521 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3522
3523 APInt SrcUndef, SrcZero;
3524 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3525 Depth + 1))
3526 return true;
3527 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3528 KnownZero = SrcZero.extractBits(NumElts, Idx);
3529
3530 // Attempt to avoid multi-use ops if we don't need anything from them.
3531 if (!DemandedElts.isAllOnes()) {
3533 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3534 if (NewSrc) {
3535 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3536 Op.getOperand(1));
3537 return TLO.CombineTo(Op, NewOp);
3538 }
3539 }
3540 break;
3541 }
3543 SDValue Vec = Op.getOperand(0);
3544 SDValue Scl = Op.getOperand(1);
3545 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3546
3547 // For a legal, constant insertion index, if we don't need this insertion
3548 // then strip it, else remove it from the demanded elts.
3549 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3550 unsigned Idx = CIdx->getZExtValue();
3551 if (!DemandedElts[Idx])
3552 return TLO.CombineTo(Op, Vec);
3553
3554 APInt DemandedVecElts(DemandedElts);
3555 DemandedVecElts.clearBit(Idx);
3556 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3557 KnownZero, TLO, Depth + 1))
3558 return true;
3559
3560 KnownUndef.setBitVal(Idx, Scl.isUndef());
3561
3562 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3563 break;
3564 }
3565
3566 APInt VecUndef, VecZero;
3567 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3568 Depth + 1))
3569 return true;
3570 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3571 break;
3572 }
3573 case ISD::VSELECT: {
3574 SDValue Sel = Op.getOperand(0);
3575 SDValue LHS = Op.getOperand(1);
3576 SDValue RHS = Op.getOperand(2);
3577
3578 // Try to transform the select condition based on the current demanded
3579 // elements.
3580 APInt UndefSel, ZeroSel;
3581 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3582 Depth + 1))
3583 return true;
3584
3585 // See if we can simplify either vselect operand.
3586 APInt DemandedLHS(DemandedElts);
3587 APInt DemandedRHS(DemandedElts);
3588 APInt UndefLHS, ZeroLHS;
3589 APInt UndefRHS, ZeroRHS;
3590 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3591 Depth + 1))
3592 return true;
3593 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3594 Depth + 1))
3595 return true;
3596
3597 KnownUndef = UndefLHS & UndefRHS;
3598 KnownZero = ZeroLHS & ZeroRHS;
3599
3600 // If we know that the selected element is always zero, we don't need the
3601 // select value element.
3602 APInt DemandedSel = DemandedElts & ~KnownZero;
3603 if (DemandedSel != DemandedElts)
3604 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3605 Depth + 1))
3606 return true;
3607
3608 break;
3609 }
3610 case ISD::VECTOR_SHUFFLE: {
3611 SDValue LHS = Op.getOperand(0);
3612 SDValue RHS = Op.getOperand(1);
3613 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3614
3615 // Collect demanded elements from shuffle operands..
3616 APInt DemandedLHS(NumElts, 0);
3617 APInt DemandedRHS(NumElts, 0);
3618 for (unsigned i = 0; i != NumElts; ++i) {
3619 int M = ShuffleMask[i];
3620 if (M < 0 || !DemandedElts[i])
3621 continue;
3622 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3623 if (M < (int)NumElts)
3624 DemandedLHS.setBit(M);
3625 else
3626 DemandedRHS.setBit(M - NumElts);
3627 }
3628
3629 // If either side isn't demanded, replace it by UNDEF. We handle this
3630 // explicitly here to also simplify in case of multiple uses (on the
3631 // contrary to the SimplifyDemandedVectorElts calls below).
3632 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3633 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3634 if (FoldLHS || FoldRHS) {
3635 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3636 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3637 SDValue NewOp =
3638 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3639 return TLO.CombineTo(Op, NewOp);
3640 }
3641
3642 // See if we can simplify either shuffle operand.
3643 APInt UndefLHS, ZeroLHS;
3644 APInt UndefRHS, ZeroRHS;
3645 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3646 Depth + 1))
3647 return true;
3648 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3649 Depth + 1))
3650 return true;
3651
3652 // Simplify mask using undef elements from LHS/RHS.
3653 bool Updated = false;
3654 bool IdentityLHS = true, IdentityRHS = true;
3655 SmallVector<int, 32> NewMask(ShuffleMask);
3656 for (unsigned i = 0; i != NumElts; ++i) {
3657 int &M = NewMask[i];
3658 if (M < 0)
3659 continue;
3660 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3661 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3662 Updated = true;
3663 M = -1;
3664 }
3665 IdentityLHS &= (M < 0) || (M == (int)i);
3666 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3667 }
3668
3669 // Update legal shuffle masks based on demanded elements if it won't reduce
3670 // to Identity which can cause premature removal of the shuffle mask.
3671 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3672 SDValue LegalShuffle =
3673 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3674 if (LegalShuffle)
3675 return TLO.CombineTo(Op, LegalShuffle);
3676 }
3677
3678 // Propagate undef/zero elements from LHS/RHS.
3679 for (unsigned i = 0; i != NumElts; ++i) {
3680 int M = ShuffleMask[i];
3681 if (M < 0) {
3682 KnownUndef.setBit(i);
3683 } else if (M < (int)NumElts) {
3684 if (UndefLHS[M])
3685 KnownUndef.setBit(i);
3686 if (ZeroLHS[M])
3687 KnownZero.setBit(i);
3688 } else {
3689 if (UndefRHS[M - NumElts])
3690 KnownUndef.setBit(i);
3691 if (ZeroRHS[M - NumElts])
3692 KnownZero.setBit(i);
3693 }
3694 }
3695 break;
3696 }
3700 APInt SrcUndef, SrcZero;
3701 SDValue Src = Op.getOperand(0);
3702 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3703 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3704 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3705 Depth + 1))
3706 return true;
3707 KnownZero = SrcZero.zextOrTrunc(NumElts);
3708 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3709
3710 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3711 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3712 DemandedSrcElts == 1) {
3713 // aext - if we just need the bottom element then we can bitcast.
3714 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3715 }
3716
3717 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3718 // zext(undef) upper bits are guaranteed to be zero.
3719 if (DemandedElts.isSubsetOf(KnownUndef))
3720 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3721 KnownUndef.clearAllBits();
3722
3723 // zext - if we just need the bottom element then we can mask:
3724 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3725 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3726 Op->isOnlyUserOf(Src.getNode()) &&
3727 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3728 SDLoc DL(Op);
3729 EVT SrcVT = Src.getValueType();
3730 EVT SrcSVT = SrcVT.getScalarType();
3731
3732 // If we're after type legalization and SrcSVT is not legal, use the
3733 // promoted type for creating constants to avoid creating nodes with
3734 // illegal types.
3736 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3737
3738 SmallVector<SDValue> MaskElts;
3739 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3740 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3741 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3742 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3743 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3744 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3745 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3746 }
3747 }
3748 }
3749 break;
3750 }
3751
3752 // TODO: There are more binop opcodes that could be handled here - MIN,
3753 // MAX, saturated math, etc.
3754 case ISD::ADD: {
3755 SDValue Op0 = Op.getOperand(0);
3756 SDValue Op1 = Op.getOperand(1);
3757 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3758 APInt UndefLHS, ZeroLHS;
3759 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3760 Depth + 1, /*AssumeSingleUse*/ true))
3761 return true;
3762 }
3763 [[fallthrough]];
3764 }
3765 case ISD::AVGCEILS:
3766 case ISD::AVGCEILU:
3767 case ISD::AVGFLOORS:
3768 case ISD::AVGFLOORU:
3769 case ISD::OR:
3770 case ISD::XOR:
3771 case ISD::SUB:
3772 case ISD::FADD:
3773 case ISD::FSUB:
3774 case ISD::FMUL:
3775 case ISD::FDIV:
3776 case ISD::FREM: {
3777 SDValue Op0 = Op.getOperand(0);
3778 SDValue Op1 = Op.getOperand(1);
3779
3780 APInt UndefRHS, ZeroRHS;
3781 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3782 Depth + 1))
3783 return true;
3784 APInt UndefLHS, ZeroLHS;
3785 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3786 Depth + 1))
3787 return true;
3788
3789 KnownZero = ZeroLHS & ZeroRHS;
3790 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3791
3792 // Attempt to avoid multi-use ops if we don't need anything from them.
3793 // TODO - use KnownUndef to relax the demandedelts?
3794 if (!DemandedElts.isAllOnes())
3795 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3796 return true;
3797 break;
3798 }
3799 case ISD::SHL:
3800 case ISD::SRL:
3801 case ISD::SRA:
3802 case ISD::ROTL:
3803 case ISD::ROTR: {
3804 SDValue Op0 = Op.getOperand(0);
3805 SDValue Op1 = Op.getOperand(1);
3806
3807 APInt UndefRHS, ZeroRHS;
3808 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3809 Depth + 1))
3810 return true;
3811 APInt UndefLHS, ZeroLHS;
3812 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3813 Depth + 1))
3814 return true;
3815
3816 KnownZero = ZeroLHS;
3817 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3818
3819 // Attempt to avoid multi-use ops if we don't need anything from them.
3820 // TODO - use KnownUndef to relax the demandedelts?
3821 if (!DemandedElts.isAllOnes())
3822 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3823 return true;
3824 break;
3825 }
3826 case ISD::MUL:
3827 case ISD::MULHU:
3828 case ISD::MULHS:
3829 case ISD::AND: {
3830 SDValue Op0 = Op.getOperand(0);
3831 SDValue Op1 = Op.getOperand(1);
3832
3833 APInt SrcUndef, SrcZero;
3834 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3835 Depth + 1))
3836 return true;
3837 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3838 // to demand it in Op0 - its guaranteed to be zero. There is however a
3839 // restriction, as we must not make any of the originally demanded elements
3840 // more poisonous. We could reduce amount of elements demanded, but then we
3841 // also need a to inform SimplifyDemandedVectorElts that some elements must
3842 // not be made more poisonous.
3843 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3844 TLO, Depth + 1))
3845 return true;
3846
3847 KnownUndef &= DemandedElts;
3848 KnownZero &= DemandedElts;
3849
3850 // If every element pair has a zero/undef/poison then just fold to zero.
3851 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3852 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3853 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3854 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3855
3856 // If either side has a zero element, then the result element is zero, even
3857 // if the other is an UNDEF.
3858 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3859 // and then handle 'and' nodes with the rest of the binop opcodes.
3860 KnownZero |= SrcZero;
3861 KnownUndef &= SrcUndef;
3862 KnownUndef &= ~KnownZero;
3863
3864 // Attempt to avoid multi-use ops if we don't need anything from them.
3865 if (!DemandedElts.isAllOnes())
3866 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3867 return true;
3868 break;
3869 }
3870 case ISD::TRUNCATE:
3871 case ISD::SIGN_EXTEND:
3872 case ISD::ZERO_EXTEND:
3873 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3874 KnownZero, TLO, Depth + 1))
3875 return true;
3876
3877 if (!DemandedElts.isAllOnes())
3879 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3880 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3881
3882 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3883 // zext(undef) upper bits are guaranteed to be zero.
3884 if (DemandedElts.isSubsetOf(KnownUndef))
3885 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3886 KnownUndef.clearAllBits();
3887 }
3888 break;
3889 case ISD::SINT_TO_FP:
3890 case ISD::UINT_TO_FP:
3891 case ISD::FP_TO_SINT:
3892 case ISD::FP_TO_UINT:
3893 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3894 KnownZero, TLO, Depth + 1))
3895 return true;
3896 // Don't fall through to generic undef -> undef handling.
3897 return false;
3898 default: {
3899 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3900 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3901 KnownZero, TLO, Depth))
3902 return true;
3903 } else {
3904 KnownBits Known;
3905 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3906 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3907 TLO, Depth, AssumeSingleUse))
3908 return true;
3909 }
3910 break;
3911 }
3912 }
3913 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3914
3915 // Constant fold all undef cases.
3916 // TODO: Handle zero cases as well.
3917 if (DemandedElts.isSubsetOf(KnownUndef))
3918 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3919
3920 return false;
3921}
3922
3923/// Determine which of the bits specified in Mask are known to be either zero or
3924/// one and return them in the Known.
3926 KnownBits &Known,
3927 const APInt &DemandedElts,
3928 const SelectionDAG &DAG,
3929 unsigned Depth) const {
3930 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3931 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3932 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3933 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3934 "Should use MaskedValueIsZero if you don't know whether Op"
3935 " is a target node!");
3936 Known.resetAll();
3937}
3938
3941 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3942 unsigned Depth) const {
3943 Known.resetAll();
3944}
3945
3948 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3949 unsigned Depth) const {
3950 Known.resetAll();
3951}
3952
3954 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3955 // The low bits are known zero if the pointer is aligned.
3956 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3957}
3958
3964
3965/// This method can be implemented by targets that want to expose additional
3966/// information about sign bits to the DAG Combiner.
3968 const APInt &,
3969 const SelectionDAG &,
3970 unsigned Depth) const {
3971 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3972 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3973 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3974 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3975 "Should use ComputeNumSignBits if you don't know whether Op"
3976 " is a target node!");
3977 return 1;
3978}
3979
3981 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3982 const MachineRegisterInfo &MRI, unsigned Depth) const {
3983 return 1;
3984}
3985
3987 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3988 TargetLoweringOpt &TLO, unsigned Depth) const {
3989 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3990 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3991 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3992 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3993 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3994 " is a target node!");
3995 return false;
3996}
3997
3999 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4000 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
4001 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4002 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4003 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4004 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4005 "Should use SimplifyDemandedBits if you don't know whether Op"
4006 " is a target node!");
4007 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
4008 return false;
4009}
4010
4012 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4013 SelectionDAG &DAG, unsigned Depth) const {
4014 assert(
4015 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4016 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4017 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4018 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4019 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4020 " is a target node!");
4021 return SDValue();
4022}
4023
4024SDValue
4027 SelectionDAG &DAG) const {
4028 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4029 if (!LegalMask) {
4030 std::swap(N0, N1);
4032 LegalMask = isShuffleMaskLegal(Mask, VT);
4033 }
4034
4035 if (!LegalMask)
4036 return SDValue();
4037
4038 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4039}
4040
4042 return nullptr;
4043}
4044
4046 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4047 UndefPoisonKind Kind, unsigned Depth) const {
4048 assert(
4049 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4050 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4051 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4052 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4053 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4054 " is a target node!");
4055
4056 // If Op can't create undef/poison and none of its operands are undef/poison
4057 // then Op is never undef/poison.
4058 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, Kind,
4059 /*ConsiderFlags*/ true, Depth) &&
4060 all_of(Op->ops(), [&](SDValue V) {
4061 return DAG.isGuaranteedNotToBeUndefOrPoison(V, Kind, Depth + 1);
4062 });
4063}
4064
4066 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4067 UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {
4068 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4069 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4070 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4071 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4072 "Should use canCreateUndefOrPoison if you don't know whether Op"
4073 " is a target node!");
4074 // Be conservative and return true.
4075 return true;
4076}
4077
4079 KnownFPClass &Known,
4080 const APInt &DemandedElts,
4081 const SelectionDAG &DAG,
4082 unsigned Depth) const {
4083 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4084 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4085 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4086 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4087 "Should use computeKnownFPClass if you don't know whether Op"
4088 " is a target node!");
4089}
4090
4092 const APInt &DemandedElts,
4093 const SelectionDAG &DAG,
4094 bool SNaN,
4095 unsigned Depth) const {
4096 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4097 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4098 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4099 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4100 "Should use isKnownNeverNaN if you don't know whether Op"
4101 " is a target node!");
4102 return false;
4103}
4104
4106 const APInt &DemandedElts,
4107 APInt &UndefElts,
4108 const SelectionDAG &DAG,
4109 unsigned Depth) const {
4110 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4111 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4112 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4113 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4114 "Should use isSplatValue if you don't know whether Op"
4115 " is a target node!");
4116 return false;
4117}
4118
4119// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4120// work with truncating build vectors and vectors with elements of less than
4121// 8 bits.
4123 if (!N)
4124 return false;
4125
4126 unsigned EltWidth;
4127 APInt CVal;
4128 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4129 /*AllowTruncation=*/true)) {
4130 CVal = CN->getAPIntValue();
4131 EltWidth = N.getValueType().getScalarSizeInBits();
4132 } else
4133 return false;
4134
4135 // If this is a truncating splat, truncate the splat value.
4136 // Otherwise, we may fail to match the expected values below.
4137 if (EltWidth < CVal.getBitWidth())
4138 CVal = CVal.trunc(EltWidth);
4139
4140 switch (getBooleanContents(N.getValueType())) {
4142 return CVal[0];
4144 return CVal.isOne();
4146 return CVal.isAllOnes();
4147 }
4148
4149 llvm_unreachable("Invalid boolean contents");
4150}
4151
4153 if (!N)
4154 return false;
4155
4157 if (!CN) {
4159 if (!BV)
4160 return false;
4161
4162 // Only interested in constant splats, we don't care about undef
4163 // elements in identifying boolean constants and getConstantSplatNode
4164 // returns NULL if all ops are undef;
4165 CN = BV->getConstantSplatNode();
4166 if (!CN)
4167 return false;
4168 }
4169
4170 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4171 return !CN->getAPIntValue()[0];
4172
4173 return CN->isZero();
4174}
4175
4177 bool SExt) const {
4178 if (VT == MVT::i1)
4179 return N->isOne();
4180
4182 switch (Cnt) {
4184 // An extended value of 1 is always true, unless its original type is i1,
4185 // in which case it will be sign extended to -1.
4186 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4189 return N->isAllOnes() && SExt;
4190 }
4191 llvm_unreachable("Unexpected enumeration.");
4192}
4193
4194/// This helper function of SimplifySetCC tries to optimize the comparison when
4195/// either operand of the SetCC node is a bitwise-and instruction.
4196SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4197 ISD::CondCode Cond, const SDLoc &DL,
4198 DAGCombinerInfo &DCI) const {
4199 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4200 std::swap(N0, N1);
4201
4202 SelectionDAG &DAG = DCI.DAG;
4203 EVT OpVT = N0.getValueType();
4204 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4205 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4206 return SDValue();
4207
4208 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4209 // iff everything but LSB is known zero:
4210 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4213 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4214 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4215 if (DAG.MaskedValueIsZero(N0, UpperBits))
4216 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4217 }
4218
4219 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4220 // test in a narrow type that we can truncate to with no cost. Examples:
4221 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4222 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4223 // TODO: This conservatively checks for type legality on the source and
4224 // destination types. That may inhibit optimizations, but it also
4225 // allows setcc->shift transforms that may be more beneficial.
4226 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4227 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4228 isTypeLegal(OpVT) && N0.hasOneUse()) {
4229 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4230 AndC->getAPIntValue().getActiveBits());
4231 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4232 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4233 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4234 return DAG.getSetCC(DL, VT, Trunc, Zero,
4236 }
4237 }
4238
4239 // Match these patterns in any of their permutations:
4240 // (X & Y) == Y
4241 // (X & Y) != Y
4242 SDValue X, Y;
4243 if (N0.getOperand(0) == N1) {
4244 X = N0.getOperand(1);
4245 Y = N0.getOperand(0);
4246 } else if (N0.getOperand(1) == N1) {
4247 X = N0.getOperand(0);
4248 Y = N0.getOperand(1);
4249 } else {
4250 return SDValue();
4251 }
4252
4253 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4254 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4255 // its liable to create and infinite loop.
4256 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4257 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4259 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4260 // Note that where Y is variable and is known to have at most one bit set
4261 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4262 // equivalent when Y == 0.
4263 assert(OpVT.isInteger());
4265 if (DCI.isBeforeLegalizeOps() ||
4267 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4268 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4269 // If the target supports an 'and-not' or 'and-complement' logic operation,
4270 // try to use that to make a comparison operation more efficient.
4271 // But don't do this transform if the mask is a single bit because there are
4272 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4273 // 'rlwinm' on PPC).
4274
4275 // Bail out if the compare operand that we want to turn into a zero is
4276 // already a zero (otherwise, infinite loop).
4277 if (isNullConstant(Y))
4278 return SDValue();
4279
4280 // Transform this into: ~X & Y == 0.
4281 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4282 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4283 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4284 }
4285
4286 return SDValue();
4287}
4288
4289/// This helper function of SimplifySetCC tries to optimize the comparison when
4290/// either operand of the SetCC node is a bitwise-or instruction.
4291/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4292SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4293 ISD::CondCode Cond, const SDLoc &DL,
4294 DAGCombinerInfo &DCI) const {
4295 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4296 std::swap(N0, N1);
4297
4298 SelectionDAG &DAG = DCI.DAG;
4299 EVT OpVT = N0.getValueType();
4300 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4301 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4302 return SDValue();
4303
4304 // (X | Y) == Y
4305 // (X | Y) != Y
4306 SDValue X;
4307 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4308 // If the target supports an 'and-not' or 'and-complement' logic operation,
4309 // try to use that to make a comparison operation more efficient.
4310
4311 // Bail out if the compare operand that we want to turn into a zero is
4312 // already a zero (otherwise, infinite loop).
4313 if (isNullConstant(N1))
4314 return SDValue();
4315
4316 // Transform this into: X & ~Y ==/!= 0.
4317 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4318 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4319 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4320 }
4321
4322 return SDValue();
4323}
4324
4325/// There are multiple IR patterns that could be checking whether certain
4326/// truncation of a signed number would be lossy or not. The pattern which is
4327/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4328/// We are looking for the following pattern: (KeptBits is a constant)
4329/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4330/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4331/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4332/// We will unfold it into the natural trunc+sext pattern:
4333/// ((%x << C) a>> C) dstcond %x
4334/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4335SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4336 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4337 const SDLoc &DL) const {
4338 // We must be comparing with a constant.
4339 ConstantSDNode *C1;
4340 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4341 return SDValue();
4342
4343 // N0 should be: add %x, (1 << (KeptBits-1))
4344 if (N0->getOpcode() != ISD::ADD)
4345 return SDValue();
4346
4347 // And we must be 'add'ing a constant.
4348 ConstantSDNode *C01;
4349 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4350 return SDValue();
4351
4352 SDValue X = N0->getOperand(0);
4353 EVT XVT = X.getValueType();
4354
4355 // Validate constants ...
4356
4357 APInt I1 = C1->getAPIntValue();
4358
4359 ISD::CondCode NewCond;
4360 if (Cond == ISD::CondCode::SETULT) {
4361 NewCond = ISD::CondCode::SETEQ;
4362 } else if (Cond == ISD::CondCode::SETULE) {
4363 NewCond = ISD::CondCode::SETEQ;
4364 // But need to 'canonicalize' the constant.
4365 I1 += 1;
4366 } else if (Cond == ISD::CondCode::SETUGT) {
4367 NewCond = ISD::CondCode::SETNE;
4368 // But need to 'canonicalize' the constant.
4369 I1 += 1;
4370 } else if (Cond == ISD::CondCode::SETUGE) {
4371 NewCond = ISD::CondCode::SETNE;
4372 } else
4373 return SDValue();
4374
4375 APInt I01 = C01->getAPIntValue();
4376
4377 auto checkConstants = [&I1, &I01]() -> bool {
4378 // Both of them must be power-of-two, and the constant from setcc is bigger.
4379 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4380 };
4381
4382 if (checkConstants()) {
4383 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4384 } else {
4385 // What if we invert constants? (and the target predicate)
4386 I1.negate();
4387 I01.negate();
4388 assert(XVT.isInteger());
4389 NewCond = getSetCCInverse(NewCond, XVT);
4390 if (!checkConstants())
4391 return SDValue();
4392 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4393 }
4394
4395 // They are power-of-two, so which bit is set?
4396 const unsigned KeptBits = I1.logBase2();
4397 const unsigned KeptBitsMinusOne = I01.logBase2();
4398
4399 // Magic!
4400 if (KeptBits != (KeptBitsMinusOne + 1))
4401 return SDValue();
4402 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4403
4404 // We don't want to do this in every single case.
4405 SelectionDAG &DAG = DCI.DAG;
4406 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4407 return SDValue();
4408
4409 // Unfold into: sext_inreg(%x) cond %x
4410 // Where 'cond' will be either 'eq' or 'ne'.
4411 SDValue SExtInReg = DAG.getNode(
4413 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4414 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4415}
4416
4417// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4418SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4419 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4420 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4422 "Should be a comparison with 0.");
4423 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4424 "Valid only for [in]equality comparisons.");
4425
4426 unsigned NewShiftOpcode;
4427 SDValue X, C, Y;
4428
4429 SelectionDAG &DAG = DCI.DAG;
4430
4431 // Look for '(C l>>/<< Y)'.
4432 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4433 // The shift should be one-use.
4434 if (!V.hasOneUse())
4435 return false;
4436 unsigned OldShiftOpcode = V.getOpcode();
4437 switch (OldShiftOpcode) {
4438 case ISD::SHL:
4439 NewShiftOpcode = ISD::SRL;
4440 break;
4441 case ISD::SRL:
4442 NewShiftOpcode = ISD::SHL;
4443 break;
4444 default:
4445 return false; // must be a logical shift.
4446 }
4447 // We should be shifting a constant.
4448 // FIXME: best to use isConstantOrConstantVector().
4449 C = V.getOperand(0);
4450 ConstantSDNode *CC =
4451 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4452 if (!CC)
4453 return false;
4454 Y = V.getOperand(1);
4455
4456 ConstantSDNode *XC =
4457 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4459 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4460 };
4461
4462 // LHS of comparison should be an one-use 'and'.
4463 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4464 return SDValue();
4465
4466 X = N0.getOperand(0);
4467 SDValue Mask = N0.getOperand(1);
4468
4469 // 'and' is commutative!
4470 if (!Match(Mask)) {
4471 std::swap(X, Mask);
4472 if (!Match(Mask))
4473 return SDValue();
4474 }
4475
4476 EVT VT = X.getValueType();
4477
4478 // Produce:
4479 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4480 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4481 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4482 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4483 return T2;
4484}
4485
4486/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4487/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4488/// handle the commuted versions of these patterns.
4489SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4490 ISD::CondCode Cond, const SDLoc &DL,
4491 DAGCombinerInfo &DCI) const {
4492 unsigned BOpcode = N0.getOpcode();
4493 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4494 "Unexpected binop");
4495 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4496
4497 // (X + Y) == X --> Y == 0
4498 // (X - Y) == X --> Y == 0
4499 // (X ^ Y) == X --> Y == 0
4500 SelectionDAG &DAG = DCI.DAG;
4501 EVT OpVT = N0.getValueType();
4502 SDValue X = N0.getOperand(0);
4503 SDValue Y = N0.getOperand(1);
4504 if (X == N1)
4505 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4506
4507 if (Y != N1)
4508 return SDValue();
4509
4510 // (X + Y) == Y --> X == 0
4511 // (X ^ Y) == Y --> X == 0
4512 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4513 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4514
4515 // The shift would not be valid if the operands are boolean (i1).
4516 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4517 return SDValue();
4518
4519 // (X - Y) == Y --> X == Y << 1
4520 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4521 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4522 if (!DCI.isCalledByLegalizer())
4523 DCI.AddToWorklist(YShl1.getNode());
4524 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4525}
4526
4528 SDValue N0, const APInt &C1,
4529 ISD::CondCode Cond, const SDLoc &dl,
4530 SelectionDAG &DAG) {
4531 // Look through truncs that don't change the value of a ctpop.
4532 // FIXME: Add vector support? Need to be careful with setcc result type below.
4533 SDValue CTPOP = N0;
4534 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4536 CTPOP = N0.getOperand(0);
4537
4538 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4539 return SDValue();
4540
4541 EVT CTVT = CTPOP.getValueType();
4542 SDValue CTOp = CTPOP.getOperand(0);
4543
4544 // Expand a power-of-2-or-zero comparison based on ctpop:
4545 // (ctpop x) u< 2 -> (x & x-1) == 0
4546 // (ctpop x) u> 1 -> (x & x-1) != 0
4547 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4548 // Keep the CTPOP if it is a cheap vector op.
4549 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4550 return SDValue();
4551
4552 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4553 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4554 return SDValue();
4555 if (C1 == 0 && (Cond == ISD::SETULT))
4556 return SDValue(); // This is handled elsewhere.
4557
4558 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4559
4560 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4561 SDValue Result = CTOp;
4562 for (unsigned i = 0; i < Passes; i++) {
4563 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4564 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4565 }
4567 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4568 }
4569
4570 // Expand a power-of-2 comparison based on ctpop
4571 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4572 // Keep the CTPOP if it is cheap.
4573 if (TLI.isCtpopFast(CTVT))
4574 return SDValue();
4575
4576 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4577 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4578 assert(CTVT.isInteger());
4579 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4580
4581 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4582 // check before emitting a potentially unnecessary op.
4583 if (DAG.isKnownNeverZero(CTOp)) {
4584 // (ctpop x) == 1 --> (x & x-1) == 0
4585 // (ctpop x) != 1 --> (x & x-1) != 0
4586 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4587 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4588 return RHS;
4589 }
4590
4591 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4592 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4593 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4595 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4596 }
4597
4598 return SDValue();
4599}
4600
4602 ISD::CondCode Cond, const SDLoc &dl,
4603 SelectionDAG &DAG) {
4604 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4605 return SDValue();
4606
4607 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4608 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4609 return SDValue();
4610
4611 auto getRotateSource = [](SDValue X) {
4612 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4613 return X.getOperand(0);
4614 return SDValue();
4615 };
4616
4617 // Peek through a rotated value compared against 0 or -1:
4618 // (rot X, Y) == 0/-1 --> X == 0/-1
4619 // (rot X, Y) != 0/-1 --> X != 0/-1
4620 if (SDValue R = getRotateSource(N0))
4621 return DAG.getSetCC(dl, VT, R, N1, Cond);
4622
4623 // Peek through an 'or' of a rotated value compared against 0:
4624 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4625 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4626 //
4627 // TODO: Add the 'and' with -1 sibling.
4628 // TODO: Recurse through a series of 'or' ops to find the rotate.
4629 EVT OpVT = N0.getValueType();
4630 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4631 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4632 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4633 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4634 }
4635 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4636 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4637 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4638 }
4639 }
4640
4641 return SDValue();
4642}
4643
4645 ISD::CondCode Cond, const SDLoc &dl,
4646 SelectionDAG &DAG) {
4647 // If we are testing for all-bits-clear, we might be able to do that with
4648 // less shifting since bit-order does not matter.
4649 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4650 return SDValue();
4651
4652 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4653 if (!C1 || !C1->isZero())
4654 return SDValue();
4655
4656 if (!N0.hasOneUse() ||
4657 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4658 return SDValue();
4659
4660 unsigned BitWidth = N0.getScalarValueSizeInBits();
4661 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4662 if (!ShAmtC)
4663 return SDValue();
4664
4665 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4666 if (ShAmt == 0)
4667 return SDValue();
4668
4669 // Canonicalize fshr as fshl to reduce pattern-matching.
4670 if (N0.getOpcode() == ISD::FSHR)
4671 ShAmt = BitWidth - ShAmt;
4672
4673 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4674 SDValue X, Y;
4675 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4676 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4677 return false;
4678 if (Or.getOperand(0) == Other) {
4679 X = Or.getOperand(0);
4680 Y = Or.getOperand(1);
4681 return true;
4682 }
4683 if (Or.getOperand(1) == Other) {
4684 X = Or.getOperand(1);
4685 Y = Or.getOperand(0);
4686 return true;
4687 }
4688 return false;
4689 };
4690
4691 EVT OpVT = N0.getValueType();
4692 EVT ShAmtVT = N0.getOperand(2).getValueType();
4693 SDValue F0 = N0.getOperand(0);
4694 SDValue F1 = N0.getOperand(1);
4695 if (matchOr(F0, F1)) {
4696 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4697 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4698 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4699 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4700 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4701 }
4702 if (matchOr(F1, F0)) {
4703 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4704 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4705 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4706 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4707 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4708 }
4709
4710 return SDValue();
4711}
4712
4713/// Try to simplify a setcc built with the specified operands and cc. If it is
4714/// unable to simplify it, return a null SDValue.
4716 ISD::CondCode Cond, bool foldBooleans,
4717 DAGCombinerInfo &DCI,
4718 const SDLoc &dl) const {
4719 SelectionDAG &DAG = DCI.DAG;
4720 const DataLayout &Layout = DAG.getDataLayout();
4721 EVT OpVT = N0.getValueType();
4722 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4723
4724 // Constant fold or commute setcc.
4725 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4726 return Fold;
4727
4728 bool N0ConstOrSplat =
4729 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4730 bool N1ConstOrSplat =
4731 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4732
4733 // Canonicalize toward having the constant on the RHS.
4734 // TODO: Handle non-splat vector constants. All undef causes trouble.
4735 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4736 // infinite loop here when we encounter one.
4738 if (N0ConstOrSplat && !N1ConstOrSplat &&
4739 (DCI.isBeforeLegalizeOps() ||
4740 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4741 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4742
4743 // If we have a subtract with the same 2 non-constant operands as this setcc
4744 // -- but in reverse order -- then try to commute the operands of this setcc
4745 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4746 // instruction on some targets.
4747 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4748 (DCI.isBeforeLegalizeOps() ||
4749 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4750 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4751 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4752 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4753
4754 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4755 return V;
4756
4757 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4758 return V;
4759
4760 if (auto *N1C = isConstOrConstSplat(N1)) {
4761 const APInt &C1 = N1C->getAPIntValue();
4762
4763 // Optimize some CTPOP cases.
4764 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4765 return V;
4766
4767 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4768 // X * Y == 0 --> (X == 0) || (Y == 0)
4769 // X * Y != 0 --> (X != 0) && (Y != 0)
4770 // TODO: This bails out if minsize is set, but if the target doesn't have a
4771 // single instruction multiply for this type, it would likely be
4772 // smaller to decompose.
4773 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4774 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4775 (N0->getFlags().hasNoUnsignedWrap() ||
4776 N0->getFlags().hasNoSignedWrap()) &&
4777 !Attr.hasFnAttr(Attribute::MinSize)) {
4778 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4779 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4780 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4781 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4782 }
4783
4784 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4785 // equality comparison, then we're just comparing whether X itself is
4786 // zero.
4787 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4788 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4790 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4791 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4792 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4793 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4794 // (srl (ctlz x), 5) == 0 -> X != 0
4795 // (srl (ctlz x), 5) != 1 -> X != 0
4796 Cond = ISD::SETNE;
4797 } else {
4798 // (srl (ctlz x), 5) != 0 -> X == 0
4799 // (srl (ctlz x), 5) == 1 -> X == 0
4800 Cond = ISD::SETEQ;
4801 }
4802 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4803 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4804 Cond);
4805 }
4806 }
4807 }
4808 }
4809
4810 // setcc X, 0, setlt --> X (when X is all sign bits)
4811 // setcc X, 0, setne --> X (when X is all sign bits)
4812 //
4813 // When we know that X has 0 or -1 in each element (or scalar), this
4814 // comparison will produce X. This is only true when boolean contents are
4815 // represented via 0s and -1s.
4816 if (VT == OpVT &&
4817 // Check that the result of setcc is 0 and -1.
4819 // Match only for checks X < 0 and X != 0
4820 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4821 // The identity holds iff we know all sign bits for all lanes.
4823 return N0;
4824
4825 // FIXME: Support vectors.
4826 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4827 const APInt &C1 = N1C->getAPIntValue();
4828
4829 // (zext x) == C --> x == (trunc C)
4830 // (sext x) == C --> x == (trunc C)
4831 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4832 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4833 unsigned MinBits = N0.getValueSizeInBits();
4834 SDValue PreExt;
4835 bool Signed = false;
4836 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4837 // ZExt
4838 MinBits = N0->getOperand(0).getValueSizeInBits();
4839 PreExt = N0->getOperand(0);
4840 } else if (N0->getOpcode() == ISD::AND) {
4841 // DAGCombine turns costly ZExts into ANDs
4842 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4843 if ((C->getAPIntValue()+1).isPowerOf2()) {
4844 MinBits = C->getAPIntValue().countr_one();
4845 PreExt = N0->getOperand(0);
4846 }
4847 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4848 // SExt
4849 MinBits = N0->getOperand(0).getValueSizeInBits();
4850 PreExt = N0->getOperand(0);
4851 Signed = true;
4852 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4853 // ZEXTLOAD / SEXTLOAD
4854 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4855 MinBits = LN0->getMemoryVT().getSizeInBits();
4856 PreExt = N0;
4857 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4858 Signed = true;
4859 MinBits = LN0->getMemoryVT().getSizeInBits();
4860 PreExt = N0;
4861 }
4862 }
4863
4864 // Figure out how many bits we need to preserve this constant.
4865 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4866
4867 // Make sure we're not losing bits from the constant.
4868 if (MinBits > 0 &&
4869 MinBits < C1.getBitWidth() &&
4870 MinBits >= ReqdBits) {
4871 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4872 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4873 // Will get folded away.
4874 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4875 if (MinBits == 1 && C1 == 1)
4876 // Invert the condition.
4877 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4879 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4880 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4881 }
4882
4883 // If truncating the setcc operands is not desirable, we can still
4884 // simplify the expression in some cases:
4885 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4886 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4887 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4888 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4889 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4890 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4891 SDValue TopSetCC = N0->getOperand(0);
4892 unsigned N0Opc = N0->getOpcode();
4893 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4894 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4895 TopSetCC.getOpcode() == ISD::SETCC &&
4896 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4897 (isConstFalseVal(N1) ||
4898 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4899
4900 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4901 (!N1C->isZero() && Cond == ISD::SETNE);
4902
4903 if (!Inverse)
4904 return TopSetCC;
4905
4907 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4908 TopSetCC.getOperand(0).getValueType());
4909 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4910 TopSetCC.getOperand(1),
4911 InvCond);
4912 }
4913 }
4914 }
4915
4916 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4917 // equality or unsigned, and all 1 bits of the const are in the same
4918 // partial word, see if we can shorten the load.
4919 if (DCI.isBeforeLegalize() &&
4921 N0.getOpcode() == ISD::AND && C1 == 0 &&
4922 N0.getNode()->hasOneUse() &&
4923 isa<LoadSDNode>(N0.getOperand(0)) &&
4924 N0.getOperand(0).getNode()->hasOneUse() &&
4926 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4927 APInt bestMask;
4928 unsigned bestWidth = 0, bestOffset = 0;
4929 if (Lod->isSimple() && Lod->isUnindexed() &&
4930 (Lod->getMemoryVT().isByteSized() ||
4931 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4932 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4933 unsigned origWidth = N0.getValueSizeInBits();
4934 unsigned maskWidth = origWidth;
4935 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4936 // 8 bits, but have to be careful...
4937 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4938 origWidth = Lod->getMemoryVT().getSizeInBits();
4939 const APInt &Mask = N0.getConstantOperandAPInt(1);
4940 // Only consider power-of-2 widths (and at least one byte) as candiates
4941 // for the narrowed load.
4942 for (unsigned width = 8; width < origWidth; width *= 2) {
4943 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4944 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4945 // Avoid accessing any padding here for now (we could use memWidth
4946 // instead of origWidth here otherwise).
4947 unsigned maxOffset = origWidth - width;
4948 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4949 if (Mask.isSubsetOf(newMask)) {
4950 unsigned ptrOffset =
4951 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4952 unsigned IsFast = 0;
4953 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4954 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4956 ptrOffset / 8) &&
4958 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4959 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4960 IsFast) {
4961 bestOffset = ptrOffset / 8;
4962 bestMask = Mask.lshr(offset);
4963 bestWidth = width;
4964 break;
4965 }
4966 }
4967 newMask <<= 8;
4968 }
4969 if (bestWidth)
4970 break;
4971 }
4972 }
4973 if (bestWidth) {
4974 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4975 SDValue Ptr = Lod->getBasePtr();
4976 if (bestOffset != 0)
4977 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4978 SDValue NewLoad =
4979 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4980 Lod->getPointerInfo().getWithOffset(bestOffset),
4981 Lod->getBaseAlign());
4982 SDValue And =
4983 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4984 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4985 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4986 }
4987 }
4988
4989 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4990 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4991 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4992
4993 // If the comparison constant has bits in the upper part, the
4994 // zero-extended value could never match.
4996 C1.getBitWidth() - InSize))) {
4997 switch (Cond) {
4998 case ISD::SETUGT:
4999 case ISD::SETUGE:
5000 case ISD::SETEQ:
5001 return DAG.getConstant(0, dl, VT);
5002 case ISD::SETULT:
5003 case ISD::SETULE:
5004 case ISD::SETNE:
5005 return DAG.getConstant(1, dl, VT);
5006 case ISD::SETGT:
5007 case ISD::SETGE:
5008 // True if the sign bit of C1 is set.
5009 return DAG.getConstant(C1.isNegative(), dl, VT);
5010 case ISD::SETLT:
5011 case ISD::SETLE:
5012 // True if the sign bit of C1 isn't set.
5013 return DAG.getConstant(C1.isNonNegative(), dl, VT);
5014 default:
5015 break;
5016 }
5017 }
5018
5019 // Otherwise, we can perform the comparison with the low bits.
5020 switch (Cond) {
5021 case ISD::SETEQ:
5022 case ISD::SETNE:
5023 case ISD::SETUGT:
5024 case ISD::SETUGE:
5025 case ISD::SETULT:
5026 case ISD::SETULE: {
5027 EVT newVT = N0.getOperand(0).getValueType();
5028 // FIXME: Should use isNarrowingProfitable.
5029 if (DCI.isBeforeLegalizeOps() ||
5030 (isOperationLegal(ISD::SETCC, newVT) &&
5031 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5033 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5034 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5035
5036 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5037 NewConst, Cond);
5038 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5039 }
5040 break;
5041 }
5042 default:
5043 break; // todo, be more careful with signed comparisons
5044 }
5045 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5046 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5048 OpVT)) {
5049 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5050 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5051 EVT ExtDstTy = N0.getValueType();
5052 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5053
5054 // If the constant doesn't fit into the number of bits for the source of
5055 // the sign extension, it is impossible for both sides to be equal.
5056 if (C1.getSignificantBits() > ExtSrcTyBits)
5057 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5058
5059 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5060 ExtDstTy != ExtSrcTy && "Unexpected types!");
5061 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5062 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5063 DAG.getConstant(Imm, dl, ExtDstTy));
5064 if (!DCI.isCalledByLegalizer())
5065 DCI.AddToWorklist(ZextOp.getNode());
5066 // Otherwise, make this a use of a zext.
5067 return DAG.getSetCC(dl, VT, ZextOp,
5068 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5069 } else if ((N1C->isZero() || N1C->isOne()) &&
5070 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5071 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5072 // excluded as they are handled below whilst checking for foldBooleans.
5073 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5074 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5075 (N0.getValueType() == MVT::i1 ||
5079 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5080 if (TrueWhenTrue)
5081 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5082 // Invert the condition.
5083 if (N0.getOpcode() == ISD::SETCC) {
5086 if (DCI.isBeforeLegalizeOps() ||
5088 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5089 }
5090 }
5091
5092 if ((N0.getOpcode() == ISD::XOR ||
5093 (N0.getOpcode() == ISD::AND &&
5094 N0.getOperand(0).getOpcode() == ISD::XOR &&
5095 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5096 isOneConstant(N0.getOperand(1))) {
5097 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5098 // can only do this if the top bits are known zero.
5099 unsigned BitWidth = N0.getValueSizeInBits();
5100 if (DAG.MaskedValueIsZero(N0,
5102 BitWidth-1))) {
5103 // Okay, get the un-inverted input value.
5104 SDValue Val;
5105 if (N0.getOpcode() == ISD::XOR) {
5106 Val = N0.getOperand(0);
5107 } else {
5108 assert(N0.getOpcode() == ISD::AND &&
5109 N0.getOperand(0).getOpcode() == ISD::XOR);
5110 // ((X^1)&1)^1 -> X & 1
5111 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5112 N0.getOperand(0).getOperand(0),
5113 N0.getOperand(1));
5114 }
5115
5116 return DAG.getSetCC(dl, VT, Val, N1,
5118 }
5119 } else if (N1C->isOne()) {
5120 SDValue Op0 = N0;
5121 if (Op0.getOpcode() == ISD::TRUNCATE)
5122 Op0 = Op0.getOperand(0);
5123
5124 if ((Op0.getOpcode() == ISD::XOR) &&
5125 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5126 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5127 SDValue XorLHS = Op0.getOperand(0);
5128 SDValue XorRHS = Op0.getOperand(1);
5129 // Ensure that the input setccs return an i1 type or 0/1 value.
5130 if (Op0.getValueType() == MVT::i1 ||
5135 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5137 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5138 }
5139 }
5140 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5141 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5142 if (Op0.getValueType().bitsGT(VT))
5143 Op0 = DAG.getNode(ISD::AND, dl, VT,
5144 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5145 DAG.getConstant(1, dl, VT));
5146 else if (Op0.getValueType().bitsLT(VT))
5147 Op0 = DAG.getNode(ISD::AND, dl, VT,
5148 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5149 DAG.getConstant(1, dl, VT));
5150
5151 return DAG.getSetCC(dl, VT, Op0,
5152 DAG.getConstant(0, dl, Op0.getValueType()),
5154 }
5155 if (Op0.getOpcode() == ISD::AssertZext &&
5156 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5157 return DAG.getSetCC(dl, VT, Op0,
5158 DAG.getConstant(0, dl, Op0.getValueType()),
5160 }
5161 }
5162
5163 // Given:
5164 // icmp eq/ne (urem %x, %y), 0
5165 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5166 // icmp eq/ne %x, 0
5167 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5168 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5169 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5170 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5171 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5172 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5173 }
5174
5175 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5176 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5177 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5179 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5180 N1C->isAllOnes()) {
5181 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5182 DAG.getConstant(0, dl, OpVT),
5184 }
5185
5186 // fold (setcc (trunc x) c) -> (setcc x c)
5187 if (N0.getOpcode() == ISD::TRUNCATE &&
5189 (N0->getFlags().hasNoSignedWrap() &&
5192 EVT NewVT = N0.getOperand(0).getValueType();
5193 SDValue NewConst = DAG.getConstant(
5195 ? C1.sext(NewVT.getSizeInBits())
5196 : C1.zext(NewVT.getSizeInBits()),
5197 dl, NewVT);
5198 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5199 }
5200
5201 if (SDValue V =
5202 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5203 return V;
5204 }
5205
5206 // These simplifications apply to splat vectors as well.
5207 // TODO: Handle more splat vector cases.
5208 if (auto *N1C = isConstOrConstSplat(N1)) {
5209 const APInt &C1 = N1C->getAPIntValue();
5210
5211 APInt MinVal, MaxVal;
5212 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5214 MinVal = APInt::getSignedMinValue(OperandBitSize);
5215 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5216 } else {
5217 MinVal = APInt::getMinValue(OperandBitSize);
5218 MaxVal = APInt::getMaxValue(OperandBitSize);
5219 }
5220
5221 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5222 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5223 // X >= MIN --> true
5224 if (C1 == MinVal)
5225 return DAG.getBoolConstant(true, dl, VT, OpVT);
5226
5227 if (!VT.isVector()) { // TODO: Support this for vectors.
5228 // X >= C0 --> X > (C0 - 1)
5229 APInt C = C1 - 1;
5231 if ((DCI.isBeforeLegalizeOps() ||
5232 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5233 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5234 isLegalICmpImmediate(C.getSExtValue())))) {
5235 return DAG.getSetCC(dl, VT, N0,
5236 DAG.getConstant(C, dl, N1.getValueType()),
5237 NewCC);
5238 }
5239 }
5240 }
5241
5242 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5243 // X <= MAX --> true
5244 if (C1 == MaxVal)
5245 return DAG.getBoolConstant(true, dl, VT, OpVT);
5246
5247 // X <= C0 --> X < (C0 + 1)
5248 if (!VT.isVector()) { // TODO: Support this for vectors.
5249 APInt C = C1 + 1;
5251 if ((DCI.isBeforeLegalizeOps() ||
5252 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5253 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5254 isLegalICmpImmediate(C.getSExtValue())))) {
5255 return DAG.getSetCC(dl, VT, N0,
5256 DAG.getConstant(C, dl, N1.getValueType()),
5257 NewCC);
5258 }
5259 }
5260 }
5261
5262 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5263 if (C1 == MinVal)
5264 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5265
5266 // TODO: Support this for vectors after legalize ops.
5267 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5268 // Canonicalize setlt X, Max --> setne X, Max
5269 if (C1 == MaxVal)
5270 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5271
5272 // If we have setult X, 1, turn it into seteq X, 0
5273 if (C1 == MinVal+1)
5274 return DAG.getSetCC(dl, VT, N0,
5275 DAG.getConstant(MinVal, dl, N0.getValueType()),
5276 ISD::SETEQ);
5277 }
5278 }
5279
5280 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5281 if (C1 == MaxVal)
5282 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5283
5284 // TODO: Support this for vectors after legalize ops.
5285 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5286 // Canonicalize setgt X, Min --> setne X, Min
5287 if (C1 == MinVal)
5288 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5289
5290 // If we have setugt X, Max-1, turn it into seteq X, Max
5291 if (C1 == MaxVal-1)
5292 return DAG.getSetCC(dl, VT, N0,
5293 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5294 ISD::SETEQ);
5295 }
5296 }
5297
5298 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5299 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5300 if (C1.isZero())
5301 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5302 VT, N0, N1, Cond, DCI, dl))
5303 return CC;
5304
5305 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5306 // For example, when high 32-bits of i64 X are known clear:
5307 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5308 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5309 bool CmpZero = N1C->isZero();
5310 bool CmpNegOne = N1C->isAllOnes();
5311 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5312 // Match or(lo,shl(hi,bw/2)) pattern.
5313 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5314 unsigned EltBits = V.getScalarValueSizeInBits();
5315 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5316 return false;
5317 SDValue LHS = V.getOperand(0);
5318 SDValue RHS = V.getOperand(1);
5319 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5320 // Unshifted element must have zero upperbits.
5321 if (RHS.getOpcode() == ISD::SHL &&
5322 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5323 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5324 DAG.MaskedValueIsZero(LHS, HiBits)) {
5325 Lo = LHS;
5326 Hi = RHS.getOperand(0);
5327 return true;
5328 }
5329 if (LHS.getOpcode() == ISD::SHL &&
5330 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5331 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5332 DAG.MaskedValueIsZero(RHS, HiBits)) {
5333 Lo = RHS;
5334 Hi = LHS.getOperand(0);
5335 return true;
5336 }
5337 return false;
5338 };
5339
5340 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5341 unsigned EltBits = N0.getScalarValueSizeInBits();
5342 unsigned HalfBits = EltBits / 2;
5343 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5344 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5345 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5346 SDValue NewN0 =
5347 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5348 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5349 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5350 };
5351
5352 SDValue Lo, Hi;
5353 if (IsConcat(N0, Lo, Hi))
5354 return MergeConcat(Lo, Hi);
5355
5356 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5357 SDValue Lo0, Lo1, Hi0, Hi1;
5358 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5359 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5360 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5361 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5362 }
5363 }
5364 }
5365 }
5366
5367 // If we have "setcc X, C0", check to see if we can shrink the immediate
5368 // by changing cc.
5369 // TODO: Support this for vectors after legalize ops.
5370 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5371 // SETUGT X, SINTMAX -> SETLT X, 0
5372 // SETUGE X, SINTMIN -> SETLT X, 0
5373 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5374 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5375 return DAG.getSetCC(dl, VT, N0,
5376 DAG.getConstant(0, dl, N1.getValueType()),
5377 ISD::SETLT);
5378
5379 // SETULT X, SINTMIN -> SETGT X, -1
5380 // SETULE X, SINTMAX -> SETGT X, -1
5381 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5382 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5383 return DAG.getSetCC(dl, VT, N0,
5384 DAG.getAllOnesConstant(dl, N1.getValueType()),
5385 ISD::SETGT);
5386 }
5387 }
5388
5389 // Back to non-vector simplifications.
5390 // TODO: Can we do these for vector splats?
5391 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5392 const APInt &C1 = N1C->getAPIntValue();
5393 EVT ShValTy = N0.getValueType();
5394
5395 // Fold bit comparisons when we can. This will result in an
5396 // incorrect value when boolean false is negative one, unless
5397 // the bitsize is 1 in which case the false value is the same
5398 // in practice regardless of the representation.
5399 if ((VT.getSizeInBits() == 1 ||
5401 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5402 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5403 N0.getOpcode() == ISD::AND) {
5404 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5405 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5406 // Perform the xform if the AND RHS is a single bit.
5407 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5408 if (AndRHS->getAPIntValue().isPowerOf2() &&
5409 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5410 return DAG.getNode(
5411 ISD::TRUNCATE, dl, VT,
5412 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5413 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5414 }
5415 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5416 // (X & 8) == 8 --> (X & 8) >> 3
5417 // Perform the xform if C1 is a single bit.
5418 unsigned ShCt = C1.logBase2();
5419 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5420 return DAG.getNode(
5421 ISD::TRUNCATE, dl, VT,
5422 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5423 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5424 }
5425 }
5426 }
5427 }
5428
5429 if (C1.getSignificantBits() <= 64 &&
5431 // (X & -256) == 256 -> (X >> 8) == 1
5432 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5433 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5434 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5435 const APInt &AndRHSC = AndRHS->getAPIntValue();
5436 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5437 unsigned ShiftBits = AndRHSC.countr_zero();
5438 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5439 // If using an unsigned shift doesn't yield a legal compare
5440 // immediate, try using sra instead.
5441 APInt NewC = C1.lshr(ShiftBits);
5442 if (NewC.getSignificantBits() <= 64 &&
5444 APInt SignedC = C1.ashr(ShiftBits);
5445 if (SignedC.getSignificantBits() <= 64 &&
5447 SDValue Shift = DAG.getNode(
5448 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5449 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5450 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5451 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5452 }
5453 }
5454 SDValue Shift = DAG.getNode(
5455 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5456 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5457 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5458 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5459 }
5460 }
5461 }
5462 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5463 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5464 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5465 // X < 0x100000000 -> (X >> 32) < 1
5466 // X >= 0x100000000 -> (X >> 32) >= 1
5467 // X <= 0x0ffffffff -> (X >> 32) < 1
5468 // X > 0x0ffffffff -> (X >> 32) >= 1
5469 unsigned ShiftBits;
5470 APInt NewC = C1;
5471 ISD::CondCode NewCond = Cond;
5472 if (AdjOne) {
5473 ShiftBits = C1.countr_one();
5474 NewC = NewC + 1;
5475 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5476 } else {
5477 ShiftBits = C1.countr_zero();
5478 }
5479 NewC.lshrInPlace(ShiftBits);
5480 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5482 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5483 SDValue Shift =
5484 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5485 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5486 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5487 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5488 }
5489 }
5490 }
5491 }
5492
5494 auto *CFP = cast<ConstantFPSDNode>(N1);
5495 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5496
5497 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5498 // constant if knowing that the operand is non-nan is enough. We prefer to
5499 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5500 // materialize 0.0.
5501 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5502 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5503
5504 // setcc (fneg x), C -> setcc swap(pred) x, -C
5505 if (N0.getOpcode() == ISD::FNEG) {
5507 if (DCI.isBeforeLegalizeOps() ||
5508 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5509 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5510 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5511 }
5512 }
5513
5514 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5516 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5517 bool IsFabs = N0.getOpcode() == ISD::FABS;
5518 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5519 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5520 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5521 : (IsFabs ? fcInf : fcPosInf);
5522 if (Cond == ISD::SETUEQ)
5523 Flag |= fcNan;
5524 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5525 DAG.getTargetConstant(Flag, dl, MVT::i32));
5526 }
5527 }
5528
5529 // If the condition is not legal, see if we can find an equivalent one
5530 // which is legal.
5532 // If the comparison was an awkward floating-point == or != and one of
5533 // the comparison operands is infinity or negative infinity, convert the
5534 // condition to a less-awkward <= or >=.
5535 if (CFP->getValueAPF().isInfinity()) {
5536 bool IsNegInf = CFP->getValueAPF().isNegative();
5538 switch (Cond) {
5539 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5540 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5541 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5542 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5543 default: break;
5544 }
5545 if (NewCond != ISD::SETCC_INVALID &&
5546 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5547 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5548 }
5549 }
5550 }
5551
5552 if (N0 == N1) {
5553 // The sext(setcc()) => setcc() optimization relies on the appropriate
5554 // constant being emitted.
5555 assert(!N0.getValueType().isInteger() &&
5556 "Integer types should be handled by FoldSetCC");
5557
5558 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5559 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5560 if (UOF == 2) // FP operators that are undefined on NaNs.
5561 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5562 if (UOF == unsigned(EqTrue))
5563 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5564 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5565 // if it is not already.
5566 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5567 if (NewCond != Cond &&
5568 (DCI.isBeforeLegalizeOps() ||
5569 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5570 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5571 }
5572
5573 // ~X > ~Y --> Y > X
5574 // ~X < ~Y --> Y < X
5575 // ~X < C --> X > ~C
5576 // ~X > C --> X < ~C
5577 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5578 N0.getValueType().isInteger()) {
5579 if (isBitwiseNot(N0)) {
5580 if (isBitwiseNot(N1))
5581 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5582
5585 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5586 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5587 }
5588 }
5589 }
5590
5591 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5592 N0.getValueType().isInteger()) {
5593 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5594 N0.getOpcode() == ISD::XOR) {
5595 // Simplify (X+Y) == (X+Z) --> Y == Z
5596 if (N0.getOpcode() == N1.getOpcode()) {
5597 if (N0.getOperand(0) == N1.getOperand(0))
5598 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5599 if (N0.getOperand(1) == N1.getOperand(1))
5600 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5601 if (isCommutativeBinOp(N0.getOpcode())) {
5602 // If X op Y == Y op X, try other combinations.
5603 if (N0.getOperand(0) == N1.getOperand(1))
5604 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5605 Cond);
5606 if (N0.getOperand(1) == N1.getOperand(0))
5607 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5608 Cond);
5609 }
5610 }
5611
5612 // If RHS is a legal immediate value for a compare instruction, we need
5613 // to be careful about increasing register pressure needlessly.
5614 bool LegalRHSImm = false;
5615
5616 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5617 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5618 // Turn (X+C1) == C2 --> X == C2-C1
5619 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5620 return DAG.getSetCC(
5621 dl, VT, N0.getOperand(0),
5622 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5623 dl, N0.getValueType()),
5624 Cond);
5625
5626 // Turn (X^C1) == C2 --> X == C1^C2
5627 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5628 return DAG.getSetCC(
5629 dl, VT, N0.getOperand(0),
5630 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5631 dl, N0.getValueType()),
5632 Cond);
5633 }
5634
5635 // Turn (C1-X) == C2 --> X == C1-C2
5636 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5637 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5638 return DAG.getSetCC(
5639 dl, VT, N0.getOperand(1),
5640 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5641 dl, N0.getValueType()),
5642 Cond);
5643
5644 // Could RHSC fold directly into a compare?
5645 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5646 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5647 }
5648
5649 // (X+Y) == X --> Y == 0 and similar folds.
5650 // Don't do this if X is an immediate that can fold into a cmp
5651 // instruction and X+Y has other uses. It could be an induction variable
5652 // chain, and the transform would increase register pressure.
5653 if (!LegalRHSImm || N0.hasOneUse())
5654 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5655 return V;
5656 }
5657
5658 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5659 N1.getOpcode() == ISD::XOR)
5660 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5661 return V;
5662
5663 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5664 return V;
5665
5666 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5667 return V;
5668 }
5669
5670 // Fold remainder of division by a constant.
5671 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5672 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5673 // When division is cheap or optimizing for minimum size,
5674 // fall through to DIVREM creation by skipping this fold.
5675 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5676 if (N0.getOpcode() == ISD::UREM) {
5677 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5678 return Folded;
5679 } else if (N0.getOpcode() == ISD::SREM) {
5680 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5681 return Folded;
5682 }
5683 }
5684 }
5685
5686 // Fold away ALL boolean setcc's.
5687 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5688 SDValue Temp;
5689 switch (Cond) {
5690 default: llvm_unreachable("Unknown integer setcc!");
5691 case ISD::SETEQ: // X == Y -> ~(X^Y)
5692 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5693 N0 = DAG.getNOT(dl, Temp, OpVT);
5694 if (!DCI.isCalledByLegalizer())
5695 DCI.AddToWorklist(Temp.getNode());
5696 break;
5697 case ISD::SETNE: // X != Y --> (X^Y)
5698 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5699 break;
5700 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5701 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5702 Temp = DAG.getNOT(dl, N0, OpVT);
5703 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5704 if (!DCI.isCalledByLegalizer())
5705 DCI.AddToWorklist(Temp.getNode());
5706 break;
5707 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5708 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5709 Temp = DAG.getNOT(dl, N1, OpVT);
5710 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5711 if (!DCI.isCalledByLegalizer())
5712 DCI.AddToWorklist(Temp.getNode());
5713 break;
5714 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5715 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5716 Temp = DAG.getNOT(dl, N0, OpVT);
5717 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5718 if (!DCI.isCalledByLegalizer())
5719 DCI.AddToWorklist(Temp.getNode());
5720 break;
5721 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5722 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5723 Temp = DAG.getNOT(dl, N1, OpVT);
5724 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5725 break;
5726 }
5727 if (VT.getScalarType() != MVT::i1) {
5728 if (!DCI.isCalledByLegalizer())
5729 DCI.AddToWorklist(N0.getNode());
5730 // FIXME: If running after legalize, we probably can't do this.
5732 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5733 }
5734 return N0;
5735 }
5736
5737 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5738 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5739 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5741 N1->getFlags().hasNoUnsignedWrap()) ||
5743 N1->getFlags().hasNoSignedWrap())) &&
5745 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5746 }
5747
5748 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5749 // TODO: Remove that .isVector() check
5750 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5752 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5753 }
5754
5755 // Could not fold it.
5756 return SDValue();
5757}
5758
5759/// Returns true (and the GlobalValue and the offset) if the node is a
5760/// GlobalAddress + offset.
5762 int64_t &Offset) const {
5763
5764 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5765
5766 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5767 GA = GASD->getGlobal();
5768 Offset += GASD->getOffset();
5769 return true;
5770 }
5771
5772 if (N->isAnyAdd()) {
5773 SDValue N1 = N->getOperand(0);
5774 SDValue N2 = N->getOperand(1);
5775 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5776 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5777 Offset += V->getSExtValue();
5778 return true;
5779 }
5780 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5781 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5782 Offset += V->getSExtValue();
5783 return true;
5784 }
5785 }
5786 }
5787
5788 return false;
5789}
5790
5792 DAGCombinerInfo &DCI) const {
5793 // Default implementation: no optimization.
5794 return SDValue();
5795}
5796
5797//===----------------------------------------------------------------------===//
5798// Inline Assembler Implementation Methods
5799//===----------------------------------------------------------------------===//
5800
5803 unsigned S = Constraint.size();
5804
5805 if (S == 1) {
5806 switch (Constraint[0]) {
5807 default: break;
5808 case 'r':
5809 return C_RegisterClass;
5810 case 'm': // memory
5811 case 'o': // offsetable
5812 case 'V': // not offsetable
5813 return C_Memory;
5814 case 'p': // Address.
5815 return C_Address;
5816 case 'n': // Simple Integer
5817 case 'E': // Floating Point Constant
5818 case 'F': // Floating Point Constant
5819 return C_Immediate;
5820 case 'i': // Simple Integer or Relocatable Constant
5821 case 's': // Relocatable Constant
5822 case 'X': // Allow ANY value.
5823 case 'I': // Target registers.
5824 case 'J':
5825 case 'K':
5826 case 'L':
5827 case 'M':
5828 case 'N':
5829 case 'O':
5830 case 'P':
5831 case '<':
5832 case '>':
5833 return C_Other;
5834 }
5835 }
5836
5837 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5838 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5839 return C_Memory;
5840 return C_Register;
5841 }
5842 return C_Unknown;
5843}
5844
5845/// Try to replace an X constraint, which matches anything, with another that
5846/// has more specific requirements based on the type of the corresponding
5847/// operand.
5848const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5849 if (ConstraintVT.isInteger())
5850 return "r";
5851 if (ConstraintVT.isFloatingPoint())
5852 return "f"; // works for many targets
5853 return nullptr;
5854}
5855
5857 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5858 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5859 return SDValue();
5860}
5861
5862/// Lower the specified operand into the Ops vector.
5863/// If it is invalid, don't add anything to Ops.
5865 StringRef Constraint,
5866 std::vector<SDValue> &Ops,
5867 SelectionDAG &DAG) const {
5868
5869 if (Constraint.size() > 1)
5870 return;
5871
5872 char ConstraintLetter = Constraint[0];
5873 switch (ConstraintLetter) {
5874 default: break;
5875 case 'X': // Allows any operand
5876 case 'i': // Simple Integer or Relocatable Constant
5877 case 'n': // Simple Integer
5878 case 's': { // Relocatable Constant
5879
5881 uint64_t Offset = 0;
5882
5883 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5884 // etc., since getelementpointer is variadic. We can't use
5885 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5886 // while in this case the GA may be furthest from the root node which is
5887 // likely an ISD::ADD.
5888 while (true) {
5889 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5890 // gcc prints these as sign extended. Sign extend value to 64 bits
5891 // now; without this it would get ZExt'd later in
5892 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5893 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5894 BooleanContent BCont = getBooleanContents(MVT::i64);
5895 ISD::NodeType ExtOpc =
5896 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5897 int64_t ExtVal =
5898 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5899 Ops.push_back(
5900 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5901 return;
5902 }
5903 if (ConstraintLetter != 'n') {
5904 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5905 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5906 GA->getValueType(0),
5907 Offset + GA->getOffset()));
5908 return;
5909 }
5910 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5911 Ops.push_back(DAG.getTargetBlockAddress(
5912 BA->getBlockAddress(), BA->getValueType(0),
5913 Offset + BA->getOffset(), BA->getTargetFlags()));
5914 return;
5915 }
5917 Ops.push_back(Op);
5918 return;
5919 }
5920 }
5921 const unsigned OpCode = Op.getOpcode();
5922 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5923 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5924 Op = Op.getOperand(1);
5925 // Subtraction is not commutative.
5926 else if (OpCode == ISD::ADD &&
5927 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5928 Op = Op.getOperand(0);
5929 else
5930 return;
5931 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5932 continue;
5933 }
5934 return;
5935 }
5936 break;
5937 }
5938 }
5939}
5940
5944
5945std::pair<unsigned, const TargetRegisterClass *>
5947 StringRef Constraint,
5948 MVT VT) const {
5949 if (!Constraint.starts_with("{"))
5950 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5951 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5952
5953 // Remove the braces from around the name.
5954 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5955
5956 std::pair<unsigned, const TargetRegisterClass *> R =
5957 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5958
5959 // Figure out which register class contains this reg.
5960 for (const TargetRegisterClass *RC : RI->regclasses()) {
5961 // If none of the value types for this register class are valid, we
5962 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5963 if (!isLegalRC(*RI, *RC))
5964 continue;
5965
5966 for (const MCPhysReg &PR : *RC) {
5967 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5968 std::pair<unsigned, const TargetRegisterClass *> S =
5969 std::make_pair(PR, RC);
5970
5971 // If this register class has the requested value type, return it,
5972 // otherwise keep searching and return the first class found
5973 // if no other is found which explicitly has the requested type.
5974 if (RI->isTypeLegalForClass(*RC, VT))
5975 return S;
5976 if (!R.second)
5977 R = S;
5978 }
5979 }
5980 }
5981
5982 return R;
5983}
5984
5985//===----------------------------------------------------------------------===//
5986// Constraint Selection.
5987
5988/// Return true of this is an input operand that is a matching constraint like
5989/// "4".
5991 assert(!ConstraintCode.empty() && "No known constraint!");
5992 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5993}
5994
5995/// If this is an input matching constraint, this method returns the output
5996/// operand it matches.
5998 assert(!ConstraintCode.empty() && "No known constraint!");
5999 return atoi(ConstraintCode.c_str());
6000}
6001
6002/// Split up the constraint string from the inline assembly value into the
6003/// specific constraints and their prefixes, and also tie in the associated
6004/// operand values.
6005/// If this returns an empty vector, and if the constraint string itself
6006/// isn't empty, there was an error parsing.
6009 const TargetRegisterInfo *TRI,
6010 const CallBase &Call) const {
6011 /// Information about all of the constraints.
6012 AsmOperandInfoVector ConstraintOperands;
6013 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
6014 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6015
6016 // Do a prepass over the constraints, canonicalizing them, and building up the
6017 // ConstraintOperands list.
6018 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6019 unsigned ResNo = 0; // ResNo - The result number of the next output.
6020 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6021
6022 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6023 ConstraintOperands.emplace_back(std::move(CI));
6024 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6025
6026 // Update multiple alternative constraint count.
6027 if (OpInfo.multipleAlternatives.size() > maCount)
6028 maCount = OpInfo.multipleAlternatives.size();
6029
6030 OpInfo.ConstraintVT = MVT::Other;
6031
6032 // Compute the value type for each operand.
6033 switch (OpInfo.Type) {
6034 case InlineAsm::isOutput: {
6035 // Indirect outputs just consume an argument.
6036 if (OpInfo.isIndirect) {
6037 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6038 break;
6039 }
6040
6041 // The return value of the call is this value. As such, there is no
6042 // corresponding argument.
6043 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6044 EVT VT;
6045 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6046 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6047 } else {
6048 assert(ResNo == 0 && "Asm only has one result!");
6049 VT = getAsmOperandValueType(DL, Call.getType());
6050 }
6051 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6052 ++ResNo;
6053 break;
6054 }
6055 case InlineAsm::isInput:
6056 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6057 break;
6058 case InlineAsm::isLabel:
6059 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6060 ++LabelNo;
6061 continue;
6063 // Nothing to do.
6064 break;
6065 }
6066
6067 if (OpInfo.CallOperandVal) {
6068 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6069 if (OpInfo.isIndirect) {
6070 OpTy = Call.getParamElementType(ArgNo);
6071 assert(OpTy && "Indirect operand must have elementtype attribute");
6072 }
6073
6074 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6075 if (StructType *STy = dyn_cast<StructType>(OpTy))
6076 if (STy->getNumElements() == 1)
6077 OpTy = STy->getElementType(0);
6078
6079 // If OpTy is not a single value, it may be a struct/union that we
6080 // can tile with integers.
6081 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6082 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6083 switch (BitSize) {
6084 default: break;
6085 case 1:
6086 case 8:
6087 case 16:
6088 case 32:
6089 case 64:
6090 case 128:
6091 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6092 break;
6093 }
6094 }
6095
6096 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6097 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6098 ArgNo++;
6099 }
6100 }
6101
6102 // If we have multiple alternative constraints, select the best alternative.
6103 if (!ConstraintOperands.empty()) {
6104 if (maCount) {
6105 unsigned bestMAIndex = 0;
6106 int bestWeight = -1;
6107 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6108 int weight = -1;
6109 unsigned maIndex;
6110 // Compute the sums of the weights for each alternative, keeping track
6111 // of the best (highest weight) one so far.
6112 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6113 int weightSum = 0;
6114 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6115 cIndex != eIndex; ++cIndex) {
6116 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6117 if (OpInfo.Type == InlineAsm::isClobber)
6118 continue;
6119
6120 // If this is an output operand with a matching input operand,
6121 // look up the matching input. If their types mismatch, e.g. one
6122 // is an integer, the other is floating point, or their sizes are
6123 // different, flag it as an maCantMatch.
6124 if (OpInfo.hasMatchingInput()) {
6125 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6126 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6127 if ((OpInfo.ConstraintVT.isInteger() !=
6128 Input.ConstraintVT.isInteger()) ||
6129 (OpInfo.ConstraintVT.getSizeInBits() !=
6130 Input.ConstraintVT.getSizeInBits())) {
6131 weightSum = -1; // Can't match.
6132 break;
6133 }
6134 }
6135 }
6136 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6137 if (weight == -1) {
6138 weightSum = -1;
6139 break;
6140 }
6141 weightSum += weight;
6142 }
6143 // Update best.
6144 if (weightSum > bestWeight) {
6145 bestWeight = weightSum;
6146 bestMAIndex = maIndex;
6147 }
6148 }
6149
6150 // Now select chosen alternative in each constraint.
6151 for (AsmOperandInfo &cInfo : ConstraintOperands)
6152 if (cInfo.Type != InlineAsm::isClobber)
6153 cInfo.selectAlternative(bestMAIndex);
6154 }
6155 }
6156
6157 // Check and hook up tied operands, choose constraint code to use.
6158 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6159 cIndex != eIndex; ++cIndex) {
6160 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6161
6162 // If this is an output operand with a matching input operand, look up the
6163 // matching input. If their types mismatch, e.g. one is an integer, the
6164 // other is floating point, or their sizes are different, flag it as an
6165 // error.
6166 if (OpInfo.hasMatchingInput()) {
6167 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6168
6169 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6170 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6171 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6172 OpInfo.ConstraintVT);
6173 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6174 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6175 Input.ConstraintVT);
6176 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6177 OpInfo.ConstraintVT.isFloatingPoint();
6178 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6179 Input.ConstraintVT.isFloatingPoint();
6180 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6181 (MatchRC.second != InputRC.second)) {
6182 report_fatal_error("Unsupported asm: input constraint"
6183 " with a matching output constraint of"
6184 " incompatible type!");
6185 }
6186 }
6187 }
6188 }
6189
6190 return ConstraintOperands;
6191}
6192
6193/// Return a number indicating our preference for chosing a type of constraint
6194/// over another, for the purpose of sorting them. Immediates are almost always
6195/// preferrable (when they can be emitted). A higher return value means a
6196/// stronger preference for one constraint type relative to another.
6197/// FIXME: We should prefer registers over memory but doing so may lead to
6198/// unrecoverable register exhaustion later.
6199/// https://github.com/llvm/llvm-project/issues/20571
6201 switch (CT) {
6204 return 4;
6207 return 3;
6209 return 2;
6211 return 1;
6213 return 0;
6214 }
6215 llvm_unreachable("Invalid constraint type");
6216}
6217
6218/// Examine constraint type and operand type and determine a weight value.
6219/// This object must already have been set up with the operand type
6220/// and the current alternative constraint selected.
6223 AsmOperandInfo &info, int maIndex) const {
6225 if (maIndex >= (int)info.multipleAlternatives.size())
6226 rCodes = &info.Codes;
6227 else
6228 rCodes = &info.multipleAlternatives[maIndex].Codes;
6229 ConstraintWeight BestWeight = CW_Invalid;
6230
6231 // Loop over the options, keeping track of the most general one.
6232 for (const std::string &rCode : *rCodes) {
6233 ConstraintWeight weight =
6234 getSingleConstraintMatchWeight(info, rCode.c_str());
6235 if (weight > BestWeight)
6236 BestWeight = weight;
6237 }
6238
6239 return BestWeight;
6240}
6241
6242/// Examine constraint type and operand type and determine a weight value.
6243/// This object must already have been set up with the operand type
6244/// and the current alternative constraint selected.
6247 AsmOperandInfo &info, const char *constraint) const {
6249 Value *CallOperandVal = info.CallOperandVal;
6250 // If we don't have a value, we can't do a match,
6251 // but allow it at the lowest weight.
6252 if (!CallOperandVal)
6253 return CW_Default;
6254 // Look at the constraint type.
6255 switch (*constraint) {
6256 case 'i': // immediate integer.
6257 case 'n': // immediate integer with a known value.
6258 if (isa<ConstantInt>(CallOperandVal))
6259 weight = CW_Constant;
6260 break;
6261 case 's': // non-explicit intregal immediate.
6262 if (isa<GlobalValue>(CallOperandVal))
6263 weight = CW_Constant;
6264 break;
6265 case 'E': // immediate float if host format.
6266 case 'F': // immediate float.
6267 if (isa<ConstantFP>(CallOperandVal))
6268 weight = CW_Constant;
6269 break;
6270 case '<': // memory operand with autodecrement.
6271 case '>': // memory operand with autoincrement.
6272 case 'm': // memory operand.
6273 case 'o': // offsettable memory operand
6274 case 'V': // non-offsettable memory operand
6275 weight = CW_Memory;
6276 break;
6277 case 'r': // general register.
6278 case 'g': // general register, memory operand or immediate integer.
6279 // note: Clang converts "g" to "imr".
6280 if (CallOperandVal->getType()->isIntegerTy())
6281 weight = CW_Register;
6282 break;
6283 case 'X': // any operand.
6284 default:
6285 weight = CW_Default;
6286 break;
6287 }
6288 return weight;
6289}
6290
6291/// If there are multiple different constraints that we could pick for this
6292/// operand (e.g. "imr") try to pick the 'best' one.
6293/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6294/// into seven classes:
6295/// Register -> one specific register
6296/// RegisterClass -> a group of regs
6297/// Memory -> memory
6298/// Address -> a symbolic memory reference
6299/// Immediate -> immediate values
6300/// Other -> magic values (such as "Flag Output Operands")
6301/// Unknown -> something we don't recognize yet and can't handle
6302/// Ideally, we would pick the most specific constraint possible: if we have
6303/// something that fits into a register, we would pick it. The problem here
6304/// is that if we have something that could either be in a register or in
6305/// memory that use of the register could cause selection of *other*
6306/// operands to fail: they might only succeed if we pick memory. Because of
6307/// this the heuristic we use is:
6308///
6309/// 1) If there is an 'other' constraint, and if the operand is valid for
6310/// that constraint, use it. This makes us take advantage of 'i'
6311/// constraints when available.
6312/// 2) Otherwise, pick the most general constraint present. This prefers
6313/// 'm' over 'r', for example.
6314///
6316 TargetLowering::AsmOperandInfo &OpInfo) const {
6317 ConstraintGroup Ret;
6318
6319 Ret.reserve(OpInfo.Codes.size());
6320 for (StringRef Code : OpInfo.Codes) {
6322
6323 // Indirect 'other' or 'immediate' constraints are not allowed.
6324 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6325 CType == TargetLowering::C_Register ||
6327 continue;
6328
6329 // Things with matching constraints can only be registers, per gcc
6330 // documentation. This mainly affects "g" constraints.
6331 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6332 continue;
6333
6334 Ret.emplace_back(Code, CType);
6335 }
6336
6338 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6339 });
6340
6341 return Ret;
6342}
6343
6344/// If we have an immediate, see if we can lower it. Return true if we can,
6345/// false otherwise.
6347 SDValue Op, SelectionDAG *DAG,
6348 const TargetLowering &TLI) {
6349
6350 assert((P.second == TargetLowering::C_Other ||
6351 P.second == TargetLowering::C_Immediate) &&
6352 "need immediate or other");
6353
6354 if (!Op.getNode())
6355 return false;
6356
6357 std::vector<SDValue> ResultOps;
6358 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6359 return !ResultOps.empty();
6360}
6361
6362/// Determines the constraint code and constraint type to use for the specific
6363/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6365 SDValue Op,
6366 SelectionDAG *DAG) const {
6367 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6368
6369 // Single-letter constraints ('r') are very common.
6370 if (OpInfo.Codes.size() == 1) {
6371 OpInfo.ConstraintCode = OpInfo.Codes[0];
6372 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6373 } else {
6375 if (G.empty())
6376 return;
6377
6378 unsigned BestIdx = 0;
6379 for (const unsigned E = G.size();
6380 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6381 G[BestIdx].second == TargetLowering::C_Immediate);
6382 ++BestIdx) {
6383 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6384 break;
6385 // If we're out of constraints, just pick the first one.
6386 if (BestIdx + 1 == E) {
6387 BestIdx = 0;
6388 break;
6389 }
6390 }
6391
6392 OpInfo.ConstraintCode = G[BestIdx].first;
6393 OpInfo.ConstraintType = G[BestIdx].second;
6394 }
6395
6396 // 'X' matches anything.
6397 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6398 // Constants are handled elsewhere. For Functions, the type here is the
6399 // type of the result, which is not what we want to look at; leave them
6400 // alone.
6401 Value *v = OpInfo.CallOperandVal;
6402 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6403 return;
6404 }
6405
6406 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6407 OpInfo.ConstraintCode = "i";
6408 return;
6409 }
6410
6411 // Otherwise, try to resolve it to something we know about by looking at
6412 // the actual operand type.
6413 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6414 OpInfo.ConstraintCode = Repl;
6415 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6416 }
6417 }
6418}
6419
6420/// Given an exact SDIV by a constant, create a multiplication
6421/// with the multiplicative inverse of the constant.
6422/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6424 const SDLoc &dl, SelectionDAG &DAG,
6425 SmallVectorImpl<SDNode *> &Created) {
6426 SDValue Op0 = N->getOperand(0);
6427 SDValue Op1 = N->getOperand(1);
6428 EVT VT = N->getValueType(0);
6429 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6430 EVT ShSVT = ShVT.getScalarType();
6431
6432 bool UseSRA = false;
6433 SmallVector<SDValue, 16> Shifts, Factors;
6434
6435 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6436 if (C->isZero())
6437 return false;
6438
6439 EVT CT = C->getValueType(0);
6440 APInt Divisor = C->getAPIntValue();
6441 unsigned Shift = Divisor.countr_zero();
6442 if (Shift) {
6443 Divisor.ashrInPlace(Shift);
6444 UseSRA = true;
6445 }
6446 APInt Factor = Divisor.multiplicativeInverse();
6447 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6448 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6449 return true;
6450 };
6451
6452 // Collect all magic values from the build vector.
6453 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6454 return SDValue();
6455
6456 SDValue Shift, Factor;
6457 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6458 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6459 Factor = DAG.getBuildVector(VT, dl, Factors);
6460 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6461 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6462 "Expected matchUnaryPredicate to return one element for scalable "
6463 "vectors");
6464 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6465 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6466 } else {
6467 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6468 Shift = Shifts[0];
6469 Factor = Factors[0];
6470 }
6471
6472 SDValue Res = Op0;
6473 if (UseSRA) {
6474 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6475 Created.push_back(Res.getNode());
6476 }
6477
6478 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6479}
6480
6481/// Given an exact UDIV by a constant, create a multiplication
6482/// with the multiplicative inverse of the constant.
6483/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6485 const SDLoc &dl, SelectionDAG &DAG,
6486 SmallVectorImpl<SDNode *> &Created) {
6487 EVT VT = N->getValueType(0);
6488 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6489 EVT ShSVT = ShVT.getScalarType();
6490
6491 bool UseSRL = false;
6492 SmallVector<SDValue, 16> Shifts, Factors;
6493
6494 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6495 if (C->isZero())
6496 return false;
6497
6498 EVT CT = C->getValueType(0);
6499 APInt Divisor = C->getAPIntValue();
6500 unsigned Shift = Divisor.countr_zero();
6501 if (Shift) {
6502 Divisor.lshrInPlace(Shift);
6503 UseSRL = true;
6504 }
6505 // Calculate the multiplicative inverse modulo BW.
6506 APInt Factor = Divisor.multiplicativeInverse();
6507 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6508 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6509 return true;
6510 };
6511
6512 SDValue Op1 = N->getOperand(1);
6513
6514 // Collect all magic values from the build vector.
6515 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6516 return SDValue();
6517
6518 SDValue Shift, Factor;
6519 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6520 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6521 Factor = DAG.getBuildVector(VT, dl, Factors);
6522 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6523 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6524 "Expected matchUnaryPredicate to return one element for scalable "
6525 "vectors");
6526 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6527 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6528 } else {
6529 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6530 Shift = Shifts[0];
6531 Factor = Factors[0];
6532 }
6533
6534 SDValue Res = N->getOperand(0);
6535 if (UseSRL) {
6536 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6537 Created.push_back(Res.getNode());
6538 }
6539
6540 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6541}
6542
6544 SelectionDAG &DAG,
6545 SmallVectorImpl<SDNode *> &Created) const {
6546 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6547 if (isIntDivCheap(N->getValueType(0), Attr))
6548 return SDValue(N, 0); // Lower SDIV as SDIV
6549 return SDValue();
6550}
6551
6552SDValue
6554 SelectionDAG &DAG,
6555 SmallVectorImpl<SDNode *> &Created) const {
6556 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6557 if (isIntDivCheap(N->getValueType(0), Attr))
6558 return SDValue(N, 0); // Lower SREM as SREM
6559 return SDValue();
6560}
6561
6562/// Build sdiv by power-of-2 with conditional move instructions
6563/// Ref: "Hacker's Delight" by Henry Warren 10-1
6564/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6565/// bgez x, label
6566/// add x, x, 2**k-1
6567/// label:
6568/// sra res, x, k
6569/// neg res, res (when the divisor is negative)
6571 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6572 SmallVectorImpl<SDNode *> &Created) const {
6573 unsigned Lg2 = Divisor.countr_zero();
6574 EVT VT = N->getValueType(0);
6575
6576 SDLoc DL(N);
6577 SDValue N0 = N->getOperand(0);
6578 SDValue Zero = DAG.getConstant(0, DL, VT);
6579 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6580 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6581
6582 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6583 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6584 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6585 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6586 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6587
6588 Created.push_back(Cmp.getNode());
6589 Created.push_back(Add.getNode());
6590 Created.push_back(CMov.getNode());
6591
6592 // Divide by pow2.
6593 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6594 DAG.getShiftAmountConstant(Lg2, VT, DL));
6595
6596 // If we're dividing by a positive value, we're done. Otherwise, we must
6597 // negate the result.
6598 if (Divisor.isNonNegative())
6599 return SRA;
6600
6601 Created.push_back(SRA.getNode());
6602 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6603}
6604
6605/// Given an ISD::SDIV node expressing a divide by constant,
6606/// return a DAG expression to select that will generate the same value by
6607/// multiplying by a magic number.
6608/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6610 bool IsAfterLegalization,
6611 bool IsAfterLegalTypes,
6612 SmallVectorImpl<SDNode *> &Created) const {
6613 SDLoc dl(N);
6614
6615 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6616 if (N->getFlags().hasExact())
6617 return BuildExactSDIV(*this, N, dl, DAG, Created);
6618
6619 EVT VT = N->getValueType(0);
6620 EVT SVT = VT.getScalarType();
6621 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6622 EVT ShSVT = ShVT.getScalarType();
6623 unsigned EltBits = VT.getScalarSizeInBits();
6624 EVT MulVT;
6625
6626 // Check to see if we can do this.
6627 // FIXME: We should be more aggressive here.
6628 EVT QueryVT = VT;
6629 if (VT.isVector()) {
6630 // If the vector type will be legalized to a vector type with the same
6631 // element type, allow the transform before type legalization if MULHS or
6632 // SMUL_LOHI are supported.
6633 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6634 if (!QueryVT.isVector() ||
6636 return SDValue();
6637 } else if (!isTypeLegal(VT)) {
6638 // Limit this to simple scalars for now.
6639 if (!VT.isSimple())
6640 return SDValue();
6641
6642 // If this type will be promoted to a large enough type with a legal
6643 // multiply operation, we can go ahead and do this transform.
6645 return SDValue();
6646
6647 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6648 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6649 !isOperationLegal(ISD::MUL, MulVT))
6650 return SDValue();
6651 }
6652
6653 bool HasMULHS =
6654 isOperationLegalOrCustom(ISD::MULHS, QueryVT, IsAfterLegalization);
6655 bool HasSMUL_LOHI =
6656 isOperationLegalOrCustom(ISD::SMUL_LOHI, QueryVT, IsAfterLegalization);
6657
6658 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6659 // If type twice as wide legal, widen and use a mul plus a shift.
6660 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6661 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6662 // custom lowered. This is very expensive so avoid it at all costs for
6663 // constant divisors.
6664 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6667 MulVT = WideVT;
6668 }
6669
6670 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6671 return SDValue();
6672
6673 // If we're after type legalization and SVT is not legal, use the
6674 // promoted type for creating constants to avoid creating nodes with
6675 // illegal types.
6676 if (IsAfterLegalTypes && VT.isVector()) {
6677 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6678 if (SVT.bitsLT(VT.getScalarType()))
6679 return SDValue();
6680 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6681 if (ShSVT.bitsLT(ShVT.getScalarType()))
6682 return SDValue();
6683 }
6684 const unsigned SVTBits = SVT.getSizeInBits();
6685
6686 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6687
6688 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6689 if (C->isZero())
6690 return false;
6691 // Truncate the divisor to the target scalar type in case it was promoted
6692 // during type legalization.
6693 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6695 int NumeratorFactor = 0;
6696 int ShiftMask = -1;
6697
6698 if (Divisor.isOne() || Divisor.isAllOnes()) {
6699 // If d is +1/-1, we just multiply the numerator by +1/-1.
6700 NumeratorFactor = Divisor.getSExtValue();
6701 magics.Magic = 0;
6702 magics.ShiftAmount = 0;
6703 ShiftMask = 0;
6704 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6705 // If d > 0 and m < 0, add the numerator.
6706 NumeratorFactor = 1;
6707 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6708 // If d < 0 and m > 0, subtract the numerator.
6709 NumeratorFactor = -1;
6710 }
6711
6712 MagicFactors.push_back(
6713 DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT));
6714 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6715 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6716 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6717 return true;
6718 };
6719
6720 SDValue N0 = N->getOperand(0);
6721 SDValue N1 = N->getOperand(1);
6722
6723 // Collect the shifts / magic values from each element.
6724 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6725 /*AllowTruncation=*/true))
6726 return SDValue();
6727
6728 SDValue MagicFactor, Factor, Shift, ShiftMask;
6729 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6730 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6731 Factor = DAG.getBuildVector(VT, dl, Factors);
6732 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6733 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6734 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6735 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6736 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6737 "Expected matchUnaryPredicate to return one element for scalable "
6738 "vectors");
6739 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6740 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6741 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6742 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6743 } else {
6744 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6745 MagicFactor = MagicFactors[0];
6746 Factor = Factors[0];
6747 Shift = Shifts[0];
6748 ShiftMask = ShiftMasks[0];
6749 }
6750
6751 // Multiply the numerator (operand 0) by the magic value.
6752 auto GetMULHS = [&](SDValue X, SDValue Y) {
6753 if (HasMULHS)
6754 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6755 if (HasSMUL_LOHI) {
6756 SDValue LoHi =
6757 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6758 return LoHi.getValue(1);
6759 }
6760
6761 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6762 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6763 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6764 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6765 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6766 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6767 };
6768
6769 SDValue Q = GetMULHS(N0, MagicFactor);
6770 if (!Q)
6771 return SDValue();
6772
6773 Created.push_back(Q.getNode());
6774
6775 // (Optionally) Add/subtract the numerator using Factor.
6776 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6777 Created.push_back(Factor.getNode());
6778 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6779 Created.push_back(Q.getNode());
6780
6781 // Shift right algebraic by shift value.
6782 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6783 Created.push_back(Q.getNode());
6784
6785 // Extract the sign bit, mask it and add it to the quotient.
6786 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6787 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6788 Created.push_back(T.getNode());
6789 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6790 Created.push_back(T.getNode());
6791 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6792}
6793
6794/// Given an ISD::UDIV node expressing a divide by constant,
6795/// return a DAG expression to select that will generate the same value by
6796/// multiplying by a magic number.
6797/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6799 bool IsAfterLegalization,
6800 bool IsAfterLegalTypes,
6801 SmallVectorImpl<SDNode *> &Created) const {
6802 SDLoc dl(N);
6803
6804 // If the udiv has an 'exact' bit we can use a simpler lowering.
6805 if (N->getFlags().hasExact())
6806 return BuildExactUDIV(*this, N, dl, DAG, Created);
6807
6808 EVT VT = N->getValueType(0);
6809 EVT SVT = VT.getScalarType();
6810 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6811 EVT ShSVT = ShVT.getScalarType();
6812 unsigned EltBits = VT.getScalarSizeInBits();
6813 EVT MulVT;
6814
6815 // Check to see if we can do this.
6816 // FIXME: We should be more aggressive here.
6817 EVT QueryVT = VT;
6818 if (VT.isVector()) {
6819 // If the vector type will be legalized to a vector type with the same
6820 // element type, allow the transform before type legalization if MULHU or
6821 // UMUL_LOHI are supported.
6822 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6823 if (!QueryVT.isVector() ||
6825 return SDValue();
6826 } else if (!isTypeLegal(VT)) {
6827 // Limit this to simple scalars for now.
6828 if (!VT.isSimple())
6829 return SDValue();
6830
6831 // If this type will be promoted to a large enough type with a legal
6832 // multiply operation, we can go ahead and do this transform.
6834 return SDValue();
6835
6836 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6837 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6838 !isOperationLegal(ISD::MUL, MulVT))
6839 return SDValue();
6840 }
6841
6842 bool HasMULHU =
6843 isOperationLegalOrCustom(ISD::MULHU, QueryVT, IsAfterLegalization);
6844 bool HasUMUL_LOHI =
6845 isOperationLegalOrCustom(ISD::UMUL_LOHI, QueryVT, IsAfterLegalization);
6846
6847 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6848 // If type twice as wide legal, widen and use a mul plus a shift.
6849 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6850 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6851 // custom lowered. This is very expensive so avoid it at all costs for
6852 // constant divisors.
6853 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6856 MulVT = WideVT;
6857 }
6858
6859 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6860 return SDValue();
6861
6862 SDValue N0 = N->getOperand(0);
6863 SDValue N1 = N->getOperand(1);
6864
6865 // Try to use leading zeros of the dividend to reduce the multiplier and
6866 // avoid expensive fixups.
6867 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6868
6869 // If we're after type legalization and SVT is not legal, use the
6870 // promoted type for creating constants to avoid creating nodes with
6871 // illegal types.
6872 if (IsAfterLegalTypes && VT.isVector()) {
6873 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6874 if (SVT.bitsLT(VT.getScalarType()))
6875 return SDValue();
6876 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6877 if (ShSVT.bitsLT(ShVT.getScalarType()))
6878 return SDValue();
6879 }
6880 const unsigned SVTBits = SVT.getSizeInBits();
6881
6882 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6883 // UMUL_LOHI is supported.
6884 const EVT WideSVT = MVT::i64;
6885 const bool HasWideMULHU =
6886 VT == MVT::i32 &&
6887 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6888 const bool HasWideUMUL_LOHI =
6889 VT == MVT::i32 &&
6890 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6891 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6892
6893 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6894 bool UseWiden = false;
6895 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6896
6897 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6898 if (C->isZero())
6899 return false;
6900 // Truncate the divisor to the target scalar type in case it was promoted
6901 // during type legalization.
6902 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6903
6904 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6905
6906 // Magic algorithm doesn't work for division by 1. We need to emit a select
6907 // at the end.
6908 if (Divisor.isOne()) {
6909 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6910 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6911 } else {
6914 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6915 /*AllowEvenDivisorOptimization=*/true,
6916 /*AllowWidenOptimization=*/AllowWiden);
6917
6918 if (magics.Widen) {
6919 UseWiden = true;
6920 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6921 } else {
6922 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6923 }
6924
6925 assert(magics.PreShift < Divisor.getBitWidth() &&
6926 "We shouldn't generate an undefined shift!");
6927 assert(magics.PostShift < Divisor.getBitWidth() &&
6928 "We shouldn't generate an undefined shift!");
6929 assert((!magics.IsAdd || magics.PreShift == 0) &&
6930 "Unexpected pre-shift");
6931 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6932 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6933 NPQFactor = DAG.getConstant(
6934 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6935 : APInt::getZero(SVTBits),
6936 dl, SVT);
6937 UseNPQ |= magics.IsAdd;
6938 UsePreShift |= magics.PreShift != 0;
6939 UsePostShift |= magics.PostShift != 0;
6940 }
6941
6942 PreShifts.push_back(PreShift);
6943 MagicFactors.push_back(MagicFactor);
6944 NPQFactors.push_back(NPQFactor);
6945 PostShifts.push_back(PostShift);
6946 return true;
6947 };
6948
6949 // Collect the shifts/magic values from each element.
6950 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6951 /*AllowTruncation=*/true))
6952 return SDValue();
6953
6954 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6955 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6956 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6957 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6958 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6959 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6960 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6961 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6962 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6963 "Expected matchUnaryPredicate to return one for scalable vectors");
6964 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6965 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6966 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6967 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6968 } else {
6969 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6970 PreShift = PreShifts[0];
6971 MagicFactor = MagicFactors[0];
6972 PostShift = PostShifts[0];
6973 }
6974
6975 if (UseWiden) {
6976 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6977 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
6978
6979 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6980 // WideSVT bits
6981 SDValue High;
6982 if (HasWideMULHU) {
6983 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
6984 } else {
6985 assert(HasWideUMUL_LOHI);
6986 SDValue LoHi =
6987 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
6988 WideN0, MagicFactor);
6989 High = LoHi.getValue(1);
6990 }
6991
6992 Created.push_back(High.getNode());
6993 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
6994 }
6995
6996 SDValue Q = N0;
6997 if (UsePreShift) {
6998 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6999 Created.push_back(Q.getNode());
7000 }
7001
7002 auto GetMULHU = [&](SDValue X, SDValue Y) {
7003 if (HasMULHU)
7004 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
7005 if (HasUMUL_LOHI) {
7006 SDValue LoHi =
7007 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
7008 return LoHi.getValue(1);
7009 }
7010
7011 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
7012 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
7013 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
7014 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
7015 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
7016 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
7017 };
7018
7019 // Multiply the numerator (operand 0) by the magic value.
7020 Q = GetMULHU(Q, MagicFactor);
7021 if (!Q)
7022 return SDValue();
7023
7024 Created.push_back(Q.getNode());
7025
7026 if (UseNPQ) {
7027 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
7028 Created.push_back(NPQ.getNode());
7029
7030 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7031 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7032 if (VT.isVector())
7033 NPQ = GetMULHU(NPQ, NPQFactor);
7034 else
7035 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
7036
7037 Created.push_back(NPQ.getNode());
7038
7039 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
7040 Created.push_back(Q.getNode());
7041 }
7042
7043 if (UsePostShift) {
7044 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7045 Created.push_back(Q.getNode());
7046 }
7047
7048 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7049
7050 SDValue One = DAG.getConstant(1, dl, VT);
7051 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7052 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7053}
7054
7055/// If all values in Values that *don't* match the predicate are same 'splat'
7056/// value, then replace all values with that splat value.
7057/// Else, if AlternativeReplacement was provided, then replace all values that
7058/// do match predicate with AlternativeReplacement value.
7059static void
7061 std::function<bool(SDValue)> Predicate,
7062 SDValue AlternativeReplacement = SDValue()) {
7063 SDValue Replacement;
7064 // Is there a value for which the Predicate does *NOT* match? What is it?
7065 auto SplatValue = llvm::find_if_not(Values, Predicate);
7066 if (SplatValue != Values.end()) {
7067 // Does Values consist only of SplatValue's and values matching Predicate?
7068 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7069 return Value == *SplatValue || Predicate(Value);
7070 })) // Then we shall replace values matching predicate with SplatValue.
7071 Replacement = *SplatValue;
7072 }
7073 if (!Replacement) {
7074 // Oops, we did not find the "baseline" splat value.
7075 if (!AlternativeReplacement)
7076 return; // Nothing to do.
7077 // Let's replace with provided value then.
7078 Replacement = AlternativeReplacement;
7079 }
7080 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7081}
7082
7083/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7084/// where the divisor and comparison target are constants,
7085/// return a DAG expression that will generate the same comparison result
7086/// using only multiplications, additions and shifts/rotations.
7087/// Ref: "Hacker's Delight" 10-17.
7088SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7089 SDValue CompTargetNode,
7091 DAGCombinerInfo &DCI,
7092 const SDLoc &DL) const {
7094 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7095 DCI, DL, Built)) {
7096 for (SDNode *N : Built)
7097 DCI.AddToWorklist(N);
7098 return Folded;
7099 }
7100
7101 return SDValue();
7102}
7103
7104SDValue
7105TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7106 SDValue CompTargetNode, ISD::CondCode Cond,
7107 DAGCombinerInfo &DCI, const SDLoc &DL,
7108 SmallVectorImpl<SDNode *> &Created) const {
7109 // fold (seteq/ne (urem N, D), C) ->
7110 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7111 // - D must be constant, with D = D0 * 2^K where D0 is odd
7112 // - P is the multiplicative inverse of D0 modulo 2^W
7113 // - Q = floor(((2^W) - 1) / D)
7114 // where W is the width of the common type of N and D.
7115 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7116 "Only applicable for (in)equality comparisons.");
7117
7118 SelectionDAG &DAG = DCI.DAG;
7119
7120 EVT VT = REMNode.getValueType();
7121 EVT SVT = VT.getScalarType();
7122 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7123 EVT ShSVT = ShVT.getScalarType();
7124
7125 // If MUL is unavailable, we cannot proceed in any case.
7126 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7127 return SDValue();
7128
7129 bool ComparingWithAllZeros = true;
7130 bool AllComparisonsWithNonZerosAreTautological = true;
7131 bool HadTautologicalLanes = false;
7132 bool AllLanesAreTautological = true;
7133 bool HadEvenDivisor = false;
7134 bool AllDivisorsArePowerOfTwo = true;
7135 bool HadTautologicalInvertedLanes = false;
7136 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7137
7138 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7139 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7140 if (CDiv->isZero())
7141 return false;
7142
7143 const APInt &D = CDiv->getAPIntValue();
7144 const APInt &Cmp = CCmp->getAPIntValue();
7145
7146 ComparingWithAllZeros &= Cmp.isZero();
7147
7148 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7149 // if C2 is not less than C1, the comparison is always false.
7150 // But we will only be able to produce the comparison that will give the
7151 // opposive tautological answer. So this lane would need to be fixed up.
7152 bool TautologicalInvertedLane = D.ule(Cmp);
7153 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7154
7155 // If all lanes are tautological (either all divisors are ones, or divisor
7156 // is not greater than the constant we are comparing with),
7157 // we will prefer to avoid the fold.
7158 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7159 HadTautologicalLanes |= TautologicalLane;
7160 AllLanesAreTautological &= TautologicalLane;
7161
7162 // If we are comparing with non-zero, we need'll need to subtract said
7163 // comparison value from the LHS. But there is no point in doing that if
7164 // every lane where we are comparing with non-zero is tautological..
7165 if (!Cmp.isZero())
7166 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7167
7168 // Decompose D into D0 * 2^K
7169 unsigned K = D.countr_zero();
7170 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7171 APInt D0 = D.lshr(K);
7172
7173 // D is even if it has trailing zeros.
7174 HadEvenDivisor |= (K != 0);
7175 // D is a power-of-two if D0 is one.
7176 // If all divisors are power-of-two, we will prefer to avoid the fold.
7177 AllDivisorsArePowerOfTwo &= D0.isOne();
7178
7179 // P = inv(D0, 2^W)
7180 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7181 unsigned W = D.getBitWidth();
7182 APInt P = D0.multiplicativeInverse();
7183 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7184
7185 // Q = floor((2^W - 1) u/ D)
7186 // R = ((2^W - 1) u% D)
7187 APInt Q, R;
7189
7190 // If we are comparing with zero, then that comparison constant is okay,
7191 // else it may need to be one less than that.
7192 if (Cmp.ugt(R))
7193 Q -= 1;
7194
7196 "We are expecting that K is always less than all-ones for ShSVT");
7197
7198 // If the lane is tautological the result can be constant-folded.
7199 if (TautologicalLane) {
7200 // Set P and K amount to a bogus values so we can try to splat them.
7201 P = 0;
7202 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7203 // And ensure that comparison constant is tautological,
7204 // it will always compare true/false.
7205 Q.setAllBits();
7206 } else {
7207 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7208 }
7209
7210 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7211 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7212 return true;
7213 };
7214
7215 SDValue N = REMNode.getOperand(0);
7216 SDValue D = REMNode.getOperand(1);
7217
7218 // Collect the values from each element.
7219 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7220 return SDValue();
7221
7222 // If all lanes are tautological, the result can be constant-folded.
7223 if (AllLanesAreTautological)
7224 return SDValue();
7225
7226 // If this is a urem by a powers-of-two, avoid the fold since it can be
7227 // best implemented as a bit test.
7228 if (AllDivisorsArePowerOfTwo)
7229 return SDValue();
7230
7231 SDValue PVal, KVal, QVal;
7232 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7233 if (HadTautologicalLanes) {
7234 // Try to turn PAmts into a splat, since we don't care about the values
7235 // that are currently '0'. If we can't, just keep '0'`s.
7237 // Try to turn KAmts into a splat, since we don't care about the values
7238 // that are currently '-1'. If we can't, change them to '0'`s.
7240 DAG.getConstant(0, DL, ShSVT));
7241 }
7242
7243 PVal = DAG.getBuildVector(VT, DL, PAmts);
7244 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7245 QVal = DAG.getBuildVector(VT, DL, QAmts);
7246 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7247 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7248 "Expected matchBinaryPredicate to return one element for "
7249 "SPLAT_VECTORs");
7250 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7251 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7252 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7253 } else {
7254 PVal = PAmts[0];
7255 KVal = KAmts[0];
7256 QVal = QAmts[0];
7257 }
7258
7259 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7260 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7261 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7262 assert(CompTargetNode.getValueType() == N.getValueType() &&
7263 "Expecting that the types on LHS and RHS of comparisons match.");
7264 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7265 }
7266
7267 // (mul N, P)
7268 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7269 Created.push_back(Op0.getNode());
7270
7271 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7272 // divisors as a performance improvement, since rotating by 0 is a no-op.
7273 if (HadEvenDivisor) {
7274 // We need ROTR to do this.
7275 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7276 return SDValue();
7277 // UREM: (rotr (mul N, P), K)
7278 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7279 Created.push_back(Op0.getNode());
7280 }
7281
7282 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7283 SDValue NewCC =
7284 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7286 if (!HadTautologicalInvertedLanes)
7287 return NewCC;
7288
7289 // If any lanes previously compared always-false, the NewCC will give
7290 // always-true result for them, so we need to fixup those lanes.
7291 // Or the other way around for inequality predicate.
7292 assert(VT.isVector() && "Can/should only get here for vectors.");
7293 Created.push_back(NewCC.getNode());
7294
7295 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7296 // if C2 is not less than C1, the comparison is always false.
7297 // But we have produced the comparison that will give the
7298 // opposive tautological answer. So these lanes would need to be fixed up.
7299 SDValue TautologicalInvertedChannels =
7300 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7301 Created.push_back(TautologicalInvertedChannels.getNode());
7302
7303 // NOTE: we avoid letting illegal types through even if we're before legalize
7304 // ops – legalization has a hard time producing good code for this.
7305 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7306 // If we have a vector select, let's replace the comparison results in the
7307 // affected lanes with the correct tautological result.
7308 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7309 DL, SETCCVT, SETCCVT);
7310 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7311 Replacement, NewCC);
7312 }
7313
7314 // Else, we can just invert the comparison result in the appropriate lanes.
7315 //
7316 // NOTE: see the note above VSELECT above.
7317 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7318 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7319 TautologicalInvertedChannels);
7320
7321 return SDValue(); // Don't know how to lower.
7322}
7323
7324/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7325/// where the divisor is constant and the comparison target is zero,
7326/// return a DAG expression that will generate the same comparison result
7327/// using only multiplications, additions and shifts/rotations.
7328/// Ref: "Hacker's Delight" 10-17.
7329SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7330 SDValue CompTargetNode,
7332 DAGCombinerInfo &DCI,
7333 const SDLoc &DL) const {
7335 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7336 DCI, DL, Built)) {
7337 assert(Built.size() <= 7 && "Max size prediction failed.");
7338 for (SDNode *N : Built)
7339 DCI.AddToWorklist(N);
7340 return Folded;
7341 }
7342
7343 return SDValue();
7344}
7345
7346SDValue
7347TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7348 SDValue CompTargetNode, ISD::CondCode Cond,
7349 DAGCombinerInfo &DCI, const SDLoc &DL,
7350 SmallVectorImpl<SDNode *> &Created) const {
7351 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7352 // Fold:
7353 // (seteq/ne (srem N, D), 0)
7354 // To:
7355 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7356 //
7357 // - D must be constant, with D = D0 * 2^K where D0 is odd
7358 // - P is the multiplicative inverse of D0 modulo 2^W
7359 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7360 // - Q = floor((2 * A) / (2^K))
7361 // where W is the width of the common type of N and D.
7362 //
7363 // When D is a power of two (and thus D0 is 1), the normal
7364 // formula for A and Q don't apply, because the derivation
7365 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7366 // does not apply. This specifically fails when N = INT_MIN.
7367 //
7368 // Instead, for power-of-two D, we use:
7369 // - A = 0
7370 // | -> No offset needed. We're effectively treating it the same as urem.
7371 // - Q = 2^(W-K) - 1
7372 // |-> Test that the top K bits are zero after rotation
7373 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7374 "Only applicable for (in)equality comparisons.");
7375
7376 SelectionDAG &DAG = DCI.DAG;
7377
7378 EVT VT = REMNode.getValueType();
7379 EVT SVT = VT.getScalarType();
7380 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7381 EVT ShSVT = ShVT.getScalarType();
7382
7383 // If we are after ops legalization, and MUL is unavailable, we can not
7384 // proceed.
7385 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7386 return SDValue();
7387
7388 // TODO: Could support comparing with non-zero too.
7389 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7390 if (!CompTarget || !CompTarget->isZero())
7391 return SDValue();
7392
7393 bool HadOneDivisor = false;
7394 bool AllDivisorsAreOnes = true;
7395 bool HadEvenDivisor = false;
7396 bool AllDivisorsArePowerOfTwo = true;
7397 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7398
7399 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7400 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7401 if (C->isZero())
7402 return false;
7403
7404 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7405
7406 // WARNING: this fold is only valid for positive divisors!
7407 // `rem %X, -C` is equivalent to `rem %X, C`
7408 APInt D = C->getAPIntValue().abs();
7409
7410 // If all divisors are ones, we will prefer to avoid the fold.
7411 HadOneDivisor |= D.isOne();
7412 AllDivisorsAreOnes &= D.isOne();
7413
7414 // Decompose D into D0 * 2^K
7415 unsigned K = D.countr_zero();
7416 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7417 APInt D0 = D.lshr(K);
7418
7419 // D is even if it has trailing zeros.
7420 HadEvenDivisor |= (K != 0);
7421
7422 // D is a power-of-two if D0 is one. This includes INT_MIN.
7423 // If all divisors are power-of-two, we will prefer to avoid the fold.
7424 AllDivisorsArePowerOfTwo &= D0.isOne();
7425
7426 // P = inv(D0, 2^W)
7427 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7428 unsigned W = D.getBitWidth();
7429 APInt P = D0.multiplicativeInverse();
7430 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7431
7432 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7433 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7434 A.clearLowBits(K);
7435
7436 // Q = floor((2 * A) / (2^K))
7437 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7438
7440 "We are expecting that A is always less than all-ones for SVT");
7442 "We are expecting that K is always less than all-ones for ShSVT");
7443
7444 // If D was a power of two, apply the alternate constant derivation.
7445 if (D0.isOne()) {
7446 // A = 0
7447 A = APInt(W, 0);
7448 // - Q = 2^(W-K) - 1
7449 Q = APInt::getLowBitsSet(W, W - K);
7450 }
7451
7452 // If the divisor is 1 the result can be constant-folded.
7453 if (D.isOne()) {
7454 // Set P, A and K to a bogus values so we can try to splat them.
7455 P = 0;
7456 A.setAllBits();
7457 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7458
7459 // x ?% 1 == 0 <--> true <--> x u<= -1
7460 Q.setAllBits();
7461 } else {
7462 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7463 }
7464
7465 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7466 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7467 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7468 return true;
7469 };
7470
7471 SDValue N = REMNode.getOperand(0);
7472 SDValue D = REMNode.getOperand(1);
7473
7474 // Collect the values from each element.
7475 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7476 return SDValue();
7477
7478 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7479 if (AllDivisorsAreOnes)
7480 return SDValue();
7481
7482 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7483 // since it can be best implemented as a bit test.
7484 if (AllDivisorsArePowerOfTwo)
7485 return SDValue();
7486
7487 SDValue PVal, AVal, KVal, QVal;
7488 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7489 if (HadOneDivisor) {
7490 // Try to turn PAmts into a splat, since we don't care about the values
7491 // that are currently '0'. If we can't, just keep '0'`s.
7493 // Try to turn AAmts into a splat, since we don't care about the
7494 // values that are currently '-1'. If we can't, change them to '0'`s.
7496 DAG.getConstant(0, DL, SVT));
7497 // Try to turn KAmts into a splat, since we don't care about the values
7498 // that are currently '-1'. If we can't, change them to '0'`s.
7500 DAG.getConstant(0, DL, ShSVT));
7501 }
7502
7503 PVal = DAG.getBuildVector(VT, DL, PAmts);
7504 AVal = DAG.getBuildVector(VT, DL, AAmts);
7505 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7506 QVal = DAG.getBuildVector(VT, DL, QAmts);
7507 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7508 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7509 QAmts.size() == 1 &&
7510 "Expected matchUnaryPredicate to return one element for scalable "
7511 "vectors");
7512 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7513 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7514 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7515 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7516 } else {
7517 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7518 PVal = PAmts[0];
7519 AVal = AAmts[0];
7520 KVal = KAmts[0];
7521 QVal = QAmts[0];
7522 }
7523
7524 // (mul N, P)
7525 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7526 Created.push_back(Op0.getNode());
7527
7528 // We need ADD to do this.
7529 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7530 return SDValue();
7531
7532 // (add (mul N, P), A)
7533 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7534 Created.push_back(Op0.getNode());
7535
7536 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7537 // divisors as a performance improvement, since rotating by 0 is a no-op.
7538 if (HadEvenDivisor) {
7539 // We need ROTR to do this.
7540 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7541 return SDValue();
7542 // SREM: (rotr (add (mul N, P), A), K)
7543 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7544 Created.push_back(Op0.getNode());
7545 }
7546
7547 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7548 return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7550}
7551
7553 const DenormalMode &Mode,
7554 SDNodeFlags Flags) const {
7555 SDLoc DL(Op);
7556 EVT VT = Op.getValueType();
7557 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7558 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7559
7560 // This is specifically a check for the handling of denormal inputs, not the
7561 // result.
7562 if (Mode.Input == DenormalMode::PreserveSign ||
7563 Mode.Input == DenormalMode::PositiveZero) {
7564 // Test = X == 0.0
7565 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7566 /*Signaling=*/false, Flags);
7567 }
7568
7569 // Testing it with denormal inputs to avoid wrong estimate.
7570 //
7571 // Test = fabs(X) < SmallestNormal
7572 const fltSemantics &FltSem = VT.getFltSemantics();
7573 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7574 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7575 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7576 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7577 /*Signaling=*/false, Flags);
7578}
7579
7581 bool LegalOps, bool OptForSize,
7583 unsigned Depth) const {
7584 // fneg is removable even if it has multiple uses.
7585 if (Op.getOpcode() == ISD::FNEG) {
7587 return Op.getOperand(0);
7588 }
7589
7590 // Don't recurse exponentially.
7592 return SDValue();
7593
7594 // Pre-increment recursion depth for use in recursive calls.
7595 ++Depth;
7596 const SDNodeFlags Flags = Op->getFlags();
7597 EVT VT = Op.getValueType();
7598 unsigned Opcode = Op.getOpcode();
7599
7600 // Don't allow anything with multiple uses unless we know it is free.
7601 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7602 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7603 isFPExtFree(VT, Op.getOperand(0).getValueType());
7604 if (!IsFreeExtend)
7605 return SDValue();
7606 }
7607
7608 auto RemoveDeadNode = [&](SDValue N) {
7609 if (N && N.getNode()->use_empty())
7610 DAG.RemoveDeadNode(N.getNode());
7611 };
7612
7613 SDLoc DL(Op);
7614
7615 // Because getNegatedExpression can delete nodes we need a handle to keep
7616 // temporary nodes alive in case the recursion manages to create an identical
7617 // node.
7618 std::list<HandleSDNode> Handles;
7619
7620 switch (Opcode) {
7621 case ISD::ConstantFP: {
7622 // Don't invert constant FP values after legalization unless the target says
7623 // the negated constant is legal.
7624 bool IsOpLegal =
7626 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7627 OptForSize);
7628
7629 if (LegalOps && !IsOpLegal)
7630 break;
7631
7632 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7633 V.changeSign();
7634 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7635
7636 // If we already have the use of the negated floating constant, it is free
7637 // to negate it even it has multiple uses.
7638 if (!Op.hasOneUse() && CFP.use_empty())
7639 break;
7641 return CFP;
7642 }
7643 case ISD::SPLAT_VECTOR: {
7644 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7645 SDValue X = Op.getOperand(0);
7647 break;
7648
7649 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7650 if (!NegX)
7651 break;
7653 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7654 }
7655 case ISD::BUILD_VECTOR: {
7656 // Only permit BUILD_VECTOR of constants.
7657 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7658 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7659 }))
7660 break;
7661
7662 bool IsOpLegal =
7665 llvm::all_of(Op->op_values(), [&](SDValue N) {
7666 return N.isUndef() ||
7667 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7668 OptForSize);
7669 });
7670
7671 if (LegalOps && !IsOpLegal)
7672 break;
7673
7675 for (SDValue C : Op->op_values()) {
7676 if (C.isUndef()) {
7677 Ops.push_back(C);
7678 continue;
7679 }
7680 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7681 V.changeSign();
7682 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7683 }
7685 return DAG.getBuildVector(VT, DL, Ops);
7686 }
7687 case ISD::FADD: {
7688 if (!Flags.hasNoSignedZeros())
7689 break;
7690
7691 // After operation legalization, it might not be legal to create new FSUBs.
7692 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7693 break;
7694 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7695
7696 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7698 SDValue NegX =
7699 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7700 // Prevent this node from being deleted by the next call.
7701 if (NegX)
7702 Handles.emplace_back(NegX);
7703
7704 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7706 SDValue NegY =
7707 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7708
7709 // We're done with the handles.
7710 Handles.clear();
7711
7712 // Negate the X if its cost is less or equal than Y.
7713 if (NegX && (CostX <= CostY)) {
7714 Cost = CostX;
7715 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7716 if (NegY != N)
7717 RemoveDeadNode(NegY);
7718 return N;
7719 }
7720
7721 // Negate the Y if it is not expensive.
7722 if (NegY) {
7723 Cost = CostY;
7724 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7725 if (NegX != N)
7726 RemoveDeadNode(NegX);
7727 return N;
7728 }
7729 break;
7730 }
7731 case ISD::FSUB: {
7732 // We can't turn -(A-B) into B-A when we honor signed zeros.
7733 if (!Flags.hasNoSignedZeros())
7734 break;
7735
7736 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7737 // fold (fneg (fsub 0, Y)) -> Y
7738 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7739 if (C->isZero()) {
7741 return Y;
7742 }
7743
7744 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7746 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7747 }
7748 case ISD::FMUL:
7749 case ISD::FDIV: {
7750 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7751
7752 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7754 SDValue NegX =
7755 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7756 // Prevent this node from being deleted by the next call.
7757 if (NegX)
7758 Handles.emplace_back(NegX);
7759
7760 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7762 SDValue NegY =
7763 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7764
7765 // We're done with the handles.
7766 Handles.clear();
7767
7768 // Negate the X if its cost is less or equal than Y.
7769 if (NegX && (CostX <= CostY)) {
7770 Cost = CostX;
7771 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7772 if (NegY != N)
7773 RemoveDeadNode(NegY);
7774 return N;
7775 }
7776
7777 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7778 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7779 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7780 break;
7781
7782 // Negate the Y if it is not expensive.
7783 if (NegY) {
7784 Cost = CostY;
7785 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7786 if (NegX != N)
7787 RemoveDeadNode(NegX);
7788 return N;
7789 }
7790 break;
7791 }
7792 case ISD::FMA:
7793 case ISD::FMULADD:
7794 case ISD::FMAD: {
7795 if (!Flags.hasNoSignedZeros())
7796 break;
7797
7798 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7800 SDValue NegZ =
7801 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7802 // Give up if fail to negate the Z.
7803 if (!NegZ)
7804 break;
7805
7806 // Prevent this node from being deleted by the next two calls.
7807 Handles.emplace_back(NegZ);
7808
7809 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7811 SDValue NegX =
7812 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7813 // Prevent this node from being deleted by the next call.
7814 if (NegX)
7815 Handles.emplace_back(NegX);
7816
7817 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7819 SDValue NegY =
7820 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7821
7822 // We're done with the handles.
7823 Handles.clear();
7824
7825 // Negate the X if its cost is less or equal than Y.
7826 if (NegX && (CostX <= CostY)) {
7827 Cost = std::min(CostX, CostZ);
7828 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7829 if (NegY != N)
7830 RemoveDeadNode(NegY);
7831 return N;
7832 }
7833
7834 // Negate the Y if it is not expensive.
7835 if (NegY) {
7836 Cost = std::min(CostY, CostZ);
7837 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7838 if (NegX != N)
7839 RemoveDeadNode(NegX);
7840 return N;
7841 }
7842 break;
7843 }
7844
7845 case ISD::FP_EXTEND:
7846 case ISD::FSIN:
7847 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7848 OptForSize, Cost, Depth))
7849 return DAG.getNode(Opcode, DL, VT, NegV);
7850 break;
7851 case ISD::FP_ROUND:
7852 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7853 OptForSize, Cost, Depth))
7854 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7855 break;
7856 case ISD::SELECT:
7857 case ISD::VSELECT: {
7858 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7859 // iff at least one cost is cheaper and the other is neutral/cheaper
7860 SDValue LHS = Op.getOperand(1);
7862 SDValue NegLHS =
7863 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7864 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7865 RemoveDeadNode(NegLHS);
7866 break;
7867 }
7868
7869 // Prevent this node from being deleted by the next call.
7870 Handles.emplace_back(NegLHS);
7871
7872 SDValue RHS = Op.getOperand(2);
7874 SDValue NegRHS =
7875 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7876
7877 // We're done with the handles.
7878 Handles.clear();
7879
7880 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7881 (CostLHS != NegatibleCost::Cheaper &&
7882 CostRHS != NegatibleCost::Cheaper)) {
7883 RemoveDeadNode(NegLHS);
7884 RemoveDeadNode(NegRHS);
7885 break;
7886 }
7887
7888 Cost = std::min(CostLHS, CostRHS);
7889 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7890 }
7891 }
7892
7893 return SDValue();
7894}
7895
7896//===----------------------------------------------------------------------===//
7897// Legalization Utilities
7898//===----------------------------------------------------------------------===//
7899
7900bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7901 SDValue LHS, SDValue RHS,
7903 EVT HiLoVT, SelectionDAG &DAG,
7904 MulExpansionKind Kind, SDValue LL,
7905 SDValue LH, SDValue RL, SDValue RH) const {
7906 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7907 Opcode == ISD::SMUL_LOHI);
7908
7909 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7911 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7913 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7915 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7917
7918 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7919 return false;
7920
7921 unsigned OuterBitSize = VT.getScalarSizeInBits();
7922 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7923
7924 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7925 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7926 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7927
7928 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7929 bool Signed) -> bool {
7930 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7931 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7932 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7933 Hi = Lo.getValue(1);
7934 return true;
7935 }
7936 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7937 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7938 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7939 return true;
7940 }
7941 return false;
7942 };
7943
7944 SDValue Lo, Hi;
7945
7946 if (!LL.getNode() && !RL.getNode() &&
7948 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7949 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7950 }
7951
7952 if (!LL.getNode())
7953 return false;
7954
7955 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7956 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7957 DAG.MaskedValueIsZero(RHS, HighMask)) {
7958 // The inputs are both zero-extended.
7959 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7960 Result.push_back(Lo);
7961 Result.push_back(Hi);
7962 if (Opcode != ISD::MUL) {
7963 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7964 Result.push_back(Zero);
7965 Result.push_back(Zero);
7966 }
7967 return true;
7968 }
7969 }
7970
7971 if (!VT.isVector() && Opcode == ISD::MUL &&
7972 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7973 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7974 // The input values are both sign-extended.
7975 // TODO non-MUL case?
7976 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7977 Result.push_back(Lo);
7978 Result.push_back(Hi);
7979 return true;
7980 }
7981 }
7982
7983 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7984 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7985
7986 if (!LH.getNode() && !RH.getNode() &&
7989 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7990 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7991 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7992 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7993 }
7994
7995 if (!LH.getNode())
7996 return false;
7997
7998 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7999 return false;
8000
8001 Result.push_back(Lo);
8002
8003 if (Opcode == ISD::MUL) {
8004 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
8005 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
8006 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
8007 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
8008 Result.push_back(Hi);
8009 return true;
8010 }
8011
8012 // Compute the full width result.
8013 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8014 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8015 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8016 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8017 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8018 };
8019
8020 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8021 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8022 return false;
8023
8024 // This is effectively the add part of a multiply-add of half-sized operands,
8025 // so it cannot overflow.
8026 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8027
8028 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8029 return false;
8030
8031 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8032 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8033
8034 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8036 if (UseGlue)
8037 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8038 Merge(Lo, Hi));
8039 else
8040 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8041 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8042
8043 SDValue Carry = Next.getValue(1);
8044 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8045 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8046
8047 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8048 return false;
8049
8050 if (UseGlue)
8051 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8052 Carry);
8053 else
8054 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8055 Zero, Carry);
8056
8057 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8058
8059 if (Opcode == ISD::SMUL_LOHI) {
8060 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8061 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8062 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8063
8064 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8065 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8066 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8067 }
8068
8069 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8070 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8071 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8072 return true;
8073}
8074
8076 SelectionDAG &DAG, MulExpansionKind Kind,
8077 SDValue LL, SDValue LH, SDValue RL,
8078 SDValue RH) const {
8080 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8081 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8082 DAG, Kind, LL, LH, RL, RH);
8083 if (Ok) {
8084 assert(Result.size() == 2);
8085 Lo = Result[0];
8086 Hi = Result[1];
8087 }
8088 return Ok;
8089}
8090
8091// Optimize unsigned division or remainder by constants for types twice as large
8092// as a legal VT.
8093//
8094// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8095// can be computed
8096// as:
8097// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8098// Remainder = Sum % Constant;
8099//
8100// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8101// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8102// High:Low into 3 chunks of W bits and compute remainder as
8103// Sum = Chunk0 + Chunk1 + Chunk2;
8104// Remainder = Sum % Constant;
8105//
8106// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8107//
8108// For division, we can compute the remainder using the algorithm described
8109// above, subtract it from the dividend to get an exact multiple of Constant.
8110// Then multiply that exact multiply by the multiplicative inverse modulo
8111// (1 << (BitWidth / 2)) to get the quotient.
8112
8113// If Constant is even, we can shift right the dividend and the divisor by the
8114// number of trailing zeros in Constant before applying the remainder algorithm.
8115// If we're after the quotient, we can subtract this value from the shifted
8116// dividend and multiply by the multiplicative inverse of the shifted divisor.
8117// If we want the remainder, we shift the value left by the number of trailing
8118// zeros and add the bits that were shifted out of the dividend.
8119bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
8120 SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
8121 SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8122 unsigned Opcode = N->getOpcode();
8123 EVT VT = N->getValueType(0);
8124
8125 unsigned BitWidth = Divisor.getBitWidth();
8126 unsigned HBitWidth = BitWidth / 2;
8128 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8129
8130 // If the divisor is even, shift it until it becomes odd.
8131 unsigned TrailingZeros = 0;
8132 if (!Divisor[0]) {
8133 TrailingZeros = Divisor.countr_zero();
8134 Divisor.lshrInPlace(TrailingZeros);
8135 }
8136
8137 // After removing trailing zeros, the divisor needs to be less than
8138 // (1 << HBitWidth).
8139 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8140 if (Divisor.uge(HalfMaxPlus1))
8141 return false;
8142
8143 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8144 // (1 << W) % Divisor == -1.
8145 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8146 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8147 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8148 if (I == HBitWidth - 1)
8149 continue;
8150
8151 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8152
8153 if (Mod.isOne()) {
8154 BestChunkWidth = I;
8155 break;
8156 }
8157
8158 // We have an alternate strategy for Remainder == Divisor - 1.
8159 // FIXME: Support HBitWidth.
8160 if (I != HBitWidth && Mod == Divisor - 1)
8161 AltChunkWidth = I;
8162 }
8163
8164 bool Alternate = false;
8165 if (!BestChunkWidth) {
8166 if (!AltChunkWidth)
8167 return false;
8168 Alternate = true;
8169 BestChunkWidth = AltChunkWidth;
8170 }
8171
8172 SDLoc dl(N);
8173
8174 assert(!LL == !LH && "Expected both input halves or no input halves!");
8175 if (!LL)
8176 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8177
8178 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8179
8180 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8181 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8182 if (HasFSHR)
8183 return DAG.getNode(ISD::FSHR, dl, HiLoVT, Hi, Lo,
8184 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8185 return DAG.getNode(
8186 ISD::OR, dl, HiLoVT,
8187 DAG.getNode(ISD::SRL, dl, HiLoVT, Lo,
8188 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl)),
8189 DAG.getNode(
8190 ISD::SHL, dl, HiLoVT, Hi,
8191 DAG.getShiftAmountConstant(HBitWidth - ShiftAmt, HiLoVT, dl)));
8192 };
8193
8194 // Helper to perform a right shift on a 128-bit value split into two halves.
8195 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8196 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8197 if (ShiftAmt == 0)
8198 return;
8199 if (ShiftAmt < HBitWidth) {
8200 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8201 Hi = DAG.getNode(ISD::SRL, dl, HiLoVT, Hi,
8202 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8203 } else if (ShiftAmt == HBitWidth) {
8204 Lo = Hi;
8205 Hi = DAG.getConstant(0, dl, HiLoVT);
8206 } else {
8207 Lo = DAG.getNode(
8208 ISD::SRL, dl, HiLoVT, Hi,
8209 DAG.getShiftAmountConstant(ShiftAmt - HBitWidth, HiLoVT, dl));
8210 Hi = DAG.getConstant(0, dl, HiLoVT);
8211 }
8212 };
8213
8214 // Shift the input by the number of TrailingZeros in the divisor. The
8215 // shifted out bits will be added to the remainder later.
8216 SDValue PartialRemL, PartialRemH;
8217 if (TrailingZeros && Opcode != ISD::UDIV) {
8218 // Save the shifted off bits if we need the remainder.
8219 if (TrailingZeros < HBitWidth) {
8220 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8221 PartialRemL = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8222 DAG.getConstant(Mask, dl, HiLoVT));
8223 } else if (TrailingZeros == HBitWidth) {
8224 // All of LL is part of the remainder.
8225 PartialRemL = LL;
8226 } else {
8227 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8228 PartialRemL = LL;
8229 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros - HBitWidth);
8230 PartialRemH = DAG.getNode(ISD::AND, dl, HiLoVT, LH,
8231 DAG.getConstant(Mask, dl, HiLoVT));
8232 }
8233 }
8234
8235 SDValue Sum;
8236 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8237 // out, add that to the final sum.
8238 if (BestChunkWidth == HBitWidth) {
8239 assert(!Alternate);
8240 // Shift LH:LL right if there were trailing zeros in the divisor.
8241 ShiftRight(LL, LH, TrailingZeros);
8242
8243 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8244 EVT SetCCType =
8245 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8247 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8248 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8249 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8250 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8251 } else {
8252 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8253 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8254 // If the boolean for the target is 0 or 1, we can add the setcc result
8255 // directly.
8256 if (getBooleanContents(HiLoVT) ==
8258 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8259 else
8260 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8261 DAG.getConstant(0, dl, HiLoVT));
8262 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8263 }
8264 } else {
8265 // Otherwise split into multple chunks and add them together. We chose
8266 // BestChunkWidth so that the sum will not overflow.
8267 SDValue Mask = DAG.getConstant(
8268 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8269
8270 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8271 // If there were trailing zeros in the divisor, increase the shift amount.
8272 unsigned Shift = I + TrailingZeros;
8273 SDValue Chunk;
8274 if (Shift == 0)
8275 Chunk = LL;
8276 else if (Shift >= HBitWidth)
8277 Chunk = DAG.getNode(
8278 ISD::SRL, dl, HiLoVT, LH,
8279 DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, dl));
8280 else
8281 Chunk = GetFSHR(LL, LH, Shift);
8282 // If we're on the last chunk, we don't need an AND.
8283 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8284 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8285 if (!Sum) {
8286 Sum = Chunk;
8287 } else {
8288 // For Alternate, we need to subtract odd chunks.
8289 unsigned ChunkNum = I / BestChunkWidth;
8290 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8291 Sum = DAG.getNode(Opc, dl, HiLoVT, Sum, Chunk);
8292 }
8293 }
8294
8295 // For Alternate, the sum may be negative, but we need a positive sum. We
8296 // can increase it by a multiple of the divisor to make it positive. For 3
8297 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8298 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8299 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8300 // positive.
8301 if (Alternate) {
8302 unsigned NumChunks = divideCeil(BitWidth - TrailingZeros, BestChunkWidth);
8303 assert(NumChunks <= 4);
8304
8305 APInt Adjust = APInt::getOneBitSet(HBitWidth, BestChunkWidth);
8306 Adjust.setBit(0);
8307 // If there are 4 chunks, we need to adjust twice.
8308 if (NumChunks == 4)
8309 Adjust <<= 1;
8310 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum,
8311 DAG.getConstant(Adjust, dl, HiLoVT));
8312 }
8313 }
8314
8315 // Perform a HiLoVT urem on the Sum using truncated divisor.
8316 SDValue RemL =
8317 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8318 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8319 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8320
8321 if (Opcode != ISD::UREM) {
8322 // If we didn't shift LH/LR earlier, do it now.
8323 if (BestChunkWidth != HBitWidth)
8324 ShiftRight(LL, LH, TrailingZeros);
8325
8326 // Subtract the remainder from the shifted dividend.
8327 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8328 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8329
8330 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8331
8332 // Multiply by the multiplicative inverse of the divisor modulo
8333 // (1 << BitWidth).
8334 APInt MulFactor = Divisor.multiplicativeInverse();
8335
8336 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8337 DAG.getConstant(MulFactor, dl, VT));
8338
8339 // Split the quotient into low and high parts.
8340 SDValue QuotL, QuotH;
8341 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8342 Result.push_back(QuotL);
8343 Result.push_back(QuotH);
8344 }
8345
8346 if (Opcode != ISD::UDIV) {
8347 // If we shifted the input, shift the remainder left and add the bits we
8348 // shifted off the input.
8349 if (TrailingZeros) {
8350 if (TrailingZeros < HBitWidth) {
8351 // Shift RemH:RemL left by TrailingZeros.
8352 // RemH gets the high bits shifted out of RemL.
8353 RemH = DAG.getNode(
8354 ISD::SRL, dl, HiLoVT, RemL,
8355 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, HiLoVT, dl));
8356 RemL =
8357 DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8358 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8359 // OR in the partial remainder.
8360 RemL = DAG.getNode(ISD::OR, dl, HiLoVT, RemL, PartialRemL,
8362 } else if (TrailingZeros == HBitWidth) {
8363 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8364 // PartialRemL.
8365 RemH = RemL;
8366 RemL = PartialRemL;
8367 } else {
8368 // Shift left by more than HBitWidth.
8369 RemH = DAG.getNode(
8370 ISD::SHL, dl, HiLoVT, RemL,
8371 DAG.getShiftAmountConstant(TrailingZeros - HBitWidth, HiLoVT, dl));
8372 RemH = DAG.getNode(ISD::OR, dl, HiLoVT, RemH, PartialRemH,
8374 RemL = PartialRemL;
8375 }
8376 }
8377 Result.push_back(RemL);
8378 Result.push_back(RemH);
8379 }
8380
8381 return true;
8382}
8383
8384bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8385 SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8386 EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8387
8388 SDValue N0 = N->getOperand(0);
8389 EVT VT = N0->getValueType(0);
8390 SDLoc DL{N};
8391
8392 assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8393
8394 // This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8395 auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8396 const APInt &Const,
8397 SmallVectorImpl<SDValue> &Result) {
8398 SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8399 SDValue RHS = DAG.getConstant(Const, DL, VT);
8400 auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8401 return expandMUL_LOHI(Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8403 LL, LH, RL, RH);
8404 };
8405
8406 // This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8407 auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8408 SDValue RH) {
8409 SDValue AddSubNode =
8411 DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8412 SDValue OutL = AddSubNode.getValue(0);
8413 SDValue Overflow = AddSubNode.getValue(1);
8414 SDValue AddSubWithOverflow =
8416 DAG.getVTList(HiLoVT, MVT::i1), LH, RH, Overflow);
8417 SDValue OutH = AddSubWithOverflow.getValue(0);
8418 return std::make_pair(OutL, OutH);
8419 };
8420
8421 // This helper creates a SRL of the pair (LL, LH) by Shift.
8422 auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8423 unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8424 if (Shift < HBitWidth) {
8425 SDValue ShAmt = DAG.getShiftAmountConstant(Shift, HiLoVT, DL);
8426 SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8427 SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8428 return std::make_pair(ResL, ResH);
8429 }
8430 SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8431 if (Shift == HBitWidth)
8432 return std::make_pair(LH, Zero);
8433 assert(Shift - HBitWidth < HBitWidth &&
8434 "We shouldn't generate an undefined shift");
8435 SDValue ShAmt = DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, DL);
8436 return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8437 };
8438
8439 // Knowledge of leading zeros may help to reduce the multiplier.
8440 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8441
8442 UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8443 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8444
8445 assert(!LL == !LH && "Expected both input halves or no input halves!");
8446 if (!LL)
8447 std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8448 SDValue QL = LL;
8449 SDValue QH = LH;
8450 if (Magics.PreShift != 0)
8451 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8452
8453 SmallVector<SDValue, 4> UMulResult;
8454 if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8455 return false;
8456
8457 QL = UMulResult[2];
8458 QH = UMulResult[3];
8459
8460 if (Magics.IsAdd) {
8461 auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8462 std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8463 std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8464 }
8465
8466 if (Magics.PostShift != 0)
8467 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8468
8469 unsigned Opcode = N->getOpcode();
8470 if (Opcode != ISD::UREM) {
8471 Result.push_back(QL);
8472 Result.push_back(QH);
8473 }
8474
8475 if (Opcode != ISD::UDIV) {
8476 SmallVector<SDValue, 2> MulResult;
8477 if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8478 return false;
8479
8480 assert(MulResult.size() == 2);
8481
8482 auto [RemL, RemH] =
8483 MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8484
8485 Result.push_back(RemL);
8486 Result.push_back(RemH);
8487 }
8488
8489 return true;
8490}
8491
8494 EVT HiLoVT, SelectionDAG &DAG,
8495 SDValue LL, SDValue LH) const {
8496 unsigned Opcode = N->getOpcode();
8497
8498 // TODO: Support signed division/remainder.
8499 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8500 return false;
8501 assert(
8502 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8503 "Unexpected opcode");
8504
8505 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8506 if (!CN)
8507 return false;
8508
8509 APInt Divisor = CN->getAPIntValue();
8510
8511 // We depend on the UREM by constant optimization in DAGCombiner that requires
8512 // high multiply.
8513 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8515 return false;
8516
8517 // Don't expand if optimizing for size.
8518 if (DAG.shouldOptForSize())
8519 return false;
8520
8521 // Early out for 0 or 1 divisors.
8522 if (Divisor.ule(1))
8523 return false;
8524
8525 if (expandUDIVREMByConstantViaUREMDecomposition(N, Divisor, Result, HiLoVT,
8526 DAG, LL, LH))
8527 return true;
8528
8529 if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8530 LH))
8531 return true;
8532
8533 return false;
8534}
8535
8536// Check that (every element of) Z is undef or not an exact multiple of BW.
8537static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8539 Z,
8540 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8541 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8542}
8543
8545 EVT VT = Node->getValueType(0);
8546 SDValue ShX, ShY;
8547 SDValue ShAmt, InvShAmt;
8548 SDValue X = Node->getOperand(0);
8549 SDValue Y = Node->getOperand(1);
8550 SDValue Z = Node->getOperand(2);
8551 SDValue Mask = Node->getOperand(3);
8552 SDValue VL = Node->getOperand(4);
8553
8554 unsigned BW = VT.getScalarSizeInBits();
8555 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8556 SDLoc DL(SDValue(Node, 0));
8557
8558 EVT ShVT = Z.getValueType();
8559 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8560 // fshl: X << C | Y >> (BW - C)
8561 // fshr: X << (BW - C) | Y >> C
8562 // where C = Z % BW is not zero
8563 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8564 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8565 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8566 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8567 VL);
8568 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8569 VL);
8570 } else {
8571 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8572 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8573 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8574 if (isPowerOf2_32(BW)) {
8575 // Z % BW -> Z & (BW - 1)
8576 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8577 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8578 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8579 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8580 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8581 } else {
8582 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8583 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8584 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8585 }
8586
8587 SDValue One = DAG.getConstant(1, DL, ShVT);
8588 if (IsFSHL) {
8589 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8590 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8591 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8592 } else {
8593 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8594 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8595 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8596 }
8597 }
8598 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8599}
8600
8602 SelectionDAG &DAG) const {
8603 if (Node->isVPOpcode())
8604 return expandVPFunnelShift(Node, DAG);
8605
8606 EVT VT = Node->getValueType(0);
8607
8608 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8612 return SDValue();
8613
8614 SDValue X = Node->getOperand(0);
8615 SDValue Y = Node->getOperand(1);
8616 SDValue Z = Node->getOperand(2);
8617
8618 unsigned BW = VT.getScalarSizeInBits();
8619 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8620 SDLoc DL(SDValue(Node, 0));
8621
8622 EVT ShVT = Z.getValueType();
8623
8624 // If a funnel shift in the other direction is more supported, use it.
8625 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8626 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8627 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8628 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8629 // fshl X, Y, Z -> fshr X, Y, -Z
8630 // fshr X, Y, Z -> fshl X, Y, -Z
8631 Z = DAG.getNegative(Z, DL, ShVT);
8632 } else {
8633 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8634 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8635 SDValue One = DAG.getConstant(1, DL, ShVT);
8636 if (IsFSHL) {
8637 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8638 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8639 } else {
8640 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8641 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8642 }
8643 Z = DAG.getNOT(DL, Z, ShVT);
8644 }
8645 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8646 }
8647
8648 SDValue ShX, ShY;
8649 SDValue ShAmt, InvShAmt;
8650 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8651 // fshl: X << C | Y >> (BW - C)
8652 // fshr: X << (BW - C) | Y >> C
8653 // where C = Z % BW is not zero
8654 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8655 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8656 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8657 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8658 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8659 } else {
8660 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8661 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8662 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8663 if (isPowerOf2_32(BW)) {
8664 // Z % BW -> Z & (BW - 1)
8665 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8666 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8667 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8668 } else {
8669 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8670 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8671 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8672 }
8673
8674 SDValue One = DAG.getConstant(1, DL, ShVT);
8675 if (IsFSHL) {
8676 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8677 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8678 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8679 } else {
8680 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8681 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8682 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8683 }
8684 }
8685 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8686}
8687
8688// TODO: Merge with expandFunnelShift.
8690 SelectionDAG &DAG) const {
8691 EVT VT = Node->getValueType(0);
8692 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8693 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8694 SDValue Op0 = Node->getOperand(0);
8695 SDValue Op1 = Node->getOperand(1);
8696 SDLoc DL(SDValue(Node, 0));
8697
8698 EVT ShVT = Op1.getValueType();
8699 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8700
8701 // If a rotate in the other direction is more supported, use it.
8702 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8703 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8704 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8705 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8706 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8707 }
8708
8709 if (!AllowVectorOps && VT.isVector() &&
8715 return SDValue();
8716
8717 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8718 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8719 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8720 SDValue ShVal;
8721 SDValue HsVal;
8722 if (isPowerOf2_32(EltSizeInBits)) {
8723 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8724 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8725 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8726 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8727 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8728 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8729 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8730 } else {
8731 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8732 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8733 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8734 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8735 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8736 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8737 SDValue One = DAG.getConstant(1, DL, ShVT);
8738 HsVal =
8739 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8740 }
8741 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8742}
8743
8744/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8745/// a chain of halving decompositions (halving element width) and/or vector
8746/// widening (doubling element count). This guides expansion strategy selection:
8747/// if true, the halving/widening path produces better code than bit-by-bit.
8748///
8749/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8750/// Widening steps are cheap (O(1) pad/extract) and don't count.
8751/// Limiting halvings to 2 prevents exponential blowup:
8752/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8753/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8754/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8756 EVT VT, unsigned HalveDepth = 0,
8757 unsigned TotalDepth = 0) {
8758 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8759 return false;
8761 return true;
8762 if (!TLI.isTypeLegal(VT))
8763 return false;
8764
8765 unsigned BW = VT.getScalarSizeInBits();
8766
8767 // Halve: halve element width, same element count.
8768 // This is the expensive step -- each halving creates ~4x more operations.
8769 if (BW % 2 == 0) {
8770 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8771 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8772 if (TLI.isTypeLegal(HalfVT) &&
8773 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8774 return true;
8775 }
8776
8777 // Widen: double element count (fixed-width vectors only).
8778 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8779 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8780 if (TLI.isTypeLegal(WideVT) &&
8781 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8782 return true;
8783
8784 return false;
8785}
8786
8788 SDLoc DL(Node);
8789 EVT VT = Node->getValueType(0);
8790 SDValue X = Node->getOperand(0);
8791 SDValue Y = Node->getOperand(1);
8792 unsigned BW = VT.getScalarSizeInBits();
8793 unsigned Opcode = Node->getOpcode();
8794 LLVMContext &Ctx = *DAG.getContext();
8795
8796 switch (Opcode) {
8797 case ISD::CLMUL: {
8798 // For vector types, try decomposition strategies that leverage legal
8799 // CLMUL on narrower or wider element types, avoiding the expensive
8800 // bit-by-bit expansion.
8801 if (VT.isVector()) {
8802 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8803 // Applies ExpandIntRes_CLMUL's identity element-wise:
8804 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8805 // where:
8806 // Lo = CLMUL(XLo, YLo)
8807 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8808 unsigned HalfBW = BW / 2;
8809 if (BW % 2 == 0) {
8810 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8811 EVT HalfVT =
8812 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8813 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8814 /*HalveDepth=*/1)) {
8815 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8816
8817 // Extract low and high halves of each element.
8818 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8819 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8820 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8821 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8822 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8823 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8824
8825 // Lo = CLMUL(XLo, YLo)
8826 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8827
8828 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8829 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8830 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8831 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8832 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8833 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8834
8835 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8836 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8837 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8838 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8839 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8840 }
8841 }
8842
8843 // Strategy 2: Promote to double-element-width CLMUL.
8844 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8845 {
8846 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8847 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8848 // If CLMUL on ExtVT is Custom (not Legal), the target may
8849 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8850 // fallback costs O(BW) vectorized iterations. Only widen when
8851 // element count is small enough that scalarization is cheaper.
8852 unsigned NumElts = VT.getVectorMinNumElements();
8853 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8854 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8855 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8856 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8857 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8858 }
8859 }
8860 }
8861
8862 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8863 // vector, extract lower result). CLMUL is element-wise, so upper
8864 // (undef) lanes don't affect the lower results.
8865 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8866 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8867 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8868 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8869 SDValue Undef = DAG.getUNDEF(WideVT);
8870 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8871 X, DAG.getVectorIdxConstant(0, DL));
8872 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8873 Y, DAG.getVectorIdxConstant(0, DL));
8874 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8875 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8876 DAG.getVectorIdxConstant(0, DL));
8877 }
8878 }
8879 }
8880
8881 // Special case: clmul(X, ~0) is equivalent to a "parallel prefix XOR" or
8882 // "bitwise parity" operation.
8884 SDValue R = X;
8885 for (unsigned I = 1; I < BW; I <<= 1) {
8886 SDValue ShAmt = DAG.getShiftAmountConstant(I, VT, DL);
8887 SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, R, ShAmt);
8888 R = DAG.getNode(ISD::XOR, DL, VT, R, Shifted);
8889 }
8890 return R;
8891 }
8892
8893 // NOTE: If you change this expansion, please update the cost model
8894 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8895 // Intrinsic::clmul.
8896
8897 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8898
8899 SDValue Res = DAG.getConstant(0, DL, VT);
8900 for (unsigned I = 0; I < BW; ++I) {
8901 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8902 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8903 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8904
8905 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8906 // multiply, use a shift-based expansion to avoid expensive MUL
8907 // instructions.
8908 SDValue Part;
8909 if (!hasBitTest(Y, ShiftAmt) &&
8912 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8913 } else {
8914 // Canonical bit test: (Y & (1 << I)) != 0
8915 SDValue Zero = DAG.getConstant(0, DL, VT);
8916 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8917 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8918 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
8919 }
8920 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
8921 }
8922 return Res;
8923 }
8924 case ISD::CLMULR:
8925 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8928 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8929 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8930 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8931 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8932 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8933 DAG.getShiftAmountConstant(1, VT, DL));
8934 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8935 }
8936 [[fallthrough]];
8937 case ISD::CLMULH: {
8938 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8939 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8940 // when any of these hold:
8941 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8942 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8943 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8944 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8945 // path creates CLMUL(VT) which will be expanded efficiently. The
8946 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8947 // causing a cycle.
8948 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8949 // => trunc path is preferred over the bitreverse path, as it avoids the
8950 // cost of 3 bitreverse operations.
8955 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
8956 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8957 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8958 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8959 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8960 if (Opcode == ISD::CLMULH)
8961 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8962 DAG.getShiftAmountConstant(1, VT, DL));
8963 return Res;
8964 }
8965 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8966 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8967 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8968 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8969 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8970 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8971 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8972 }
8973 }
8974 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8975}
8976
8978 SelectionDAG &DAG) const {
8979 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8980 EVT VT = Node->getValueType(0);
8981 unsigned VTBits = VT.getScalarSizeInBits();
8982 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8983
8984 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8985 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8986 SDValue ShOpLo = Node->getOperand(0);
8987 SDValue ShOpHi = Node->getOperand(1);
8988 SDValue ShAmt = Node->getOperand(2);
8989 EVT ShAmtVT = ShAmt.getValueType();
8990 EVT ShAmtCCVT =
8991 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8992 SDLoc dl(Node);
8993
8994 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8995 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8996 // away during isel.
8997 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8998 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8999 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
9000 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
9001 : DAG.getConstant(0, dl, VT);
9002
9003 SDValue Tmp2, Tmp3;
9004 if (IsSHL) {
9005 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
9006 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
9007 } else {
9008 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
9009 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
9010 }
9011
9012 // If the shift amount is larger or equal than the width of a part we don't
9013 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
9014 // values for large shift amounts.
9015 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
9016 DAG.getConstant(VTBits, dl, ShAmtVT));
9017 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
9018 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
9019
9020 if (IsSHL) {
9021 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9022 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9023 } else {
9024 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9025 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9026 }
9027}
9028
9030 SelectionDAG &DAG) const {
9031 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
9032 // suggested in
9033 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
9034 // It uses strict_fp operations even outside a strict_fp context in order
9035 // to guarantee that the canonicalization is not optimized away by later
9036 // passes. The result chain introduced by that is intentionally ignored
9037 // since no ordering requirement is intended here.
9038 EVT VT = Node->getValueType(0);
9039 SDLoc DL(Node);
9040 SDNodeFlags Flags = Node->getFlags();
9041 Flags.setNoFPExcept(true);
9042 SDValue One = DAG.getConstantFP(1.0, DL, VT);
9043 SDValue Mul =
9044 DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
9045 {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
9046 return Mul;
9047}
9048
9049SDValue
9051 SelectionDAG &DAG) const {
9052 SDLoc dl(Node);
9053 EVT DstVT = Node->getValueType(0);
9054 EVT DstScalarVT = DstVT.getScalarType();
9055
9056 SDValue IntVal = Node->getOperand(0);
9057 const uint64_t SemEnum = Node->getConstantOperandVal(1);
9058 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9059
9060 // Supported source formats.
9061 switch (Sem) {
9067 break;
9068 default:
9069 DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
9070 "source format (semantics enum " +
9071 Twine(SemEnum) + ")");
9072 return SDValue();
9073 }
9074
9075 const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
9076 const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
9077 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9078 const unsigned SrcMant = SrcPrecision - 1;
9079 const unsigned SrcExp = SrcBits - SrcMant - 1;
9080 const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
9081 const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
9082
9083 // Destination format parameters.
9084 const fltSemantics &DstSem = DstScalarVT.getFltSemantics();
9085 const unsigned DstBits = APFloat::getSizeInBits(DstSem);
9086 const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
9087 const unsigned DstExpBits = DstBits - DstMant - 1;
9088 const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
9089 const int DstBias = 1 - DstMinExp;
9090 const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
9091
9092 // Work in an integer type matching the destination float width.
9093 EVT IntScalarVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
9094 EVT IntVT = DstVT.isVector()
9095 ? EVT::getVectorVT(*DAG.getContext(), IntScalarVT,
9096 DstVT.getVectorElementCount())
9097 : IntScalarVT;
9098
9099 SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
9100
9101 EVT SetCCVT =
9102 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
9103
9104 SDValue Zero = DAG.getConstant(0, dl, IntVT);
9105 SDValue One = DAG.getConstant(1, dl, IntVT);
9106
9107 // Extract bit fields.
9108 const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
9109 const uint64_t ExpMask = (1ULL << SrcExp) - 1;
9110
9111 SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
9112 DAG.getConstant(MantMask, dl, IntVT));
9113
9114 SDValue ExpField =
9115 DAG.getNode(ISD::AND, dl, IntVT,
9116 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9117 DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
9118 DAG.getConstant(ExpMask, dl, IntVT));
9119
9120 SDValue SignBit =
9121 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9122 DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
9123
9124 SDValue SignShifted =
9125 DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
9126 DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
9127
9128 // Classify the input.
9129 SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
9130 SDValue IsExpAllOnes =
9131 DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
9132 SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
9133 SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
9134 SDValue IsMantNonZero =
9135 DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
9136
9137 SDValue IsNaN;
9138 if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
9139 IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9140 } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
9141 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
9142 } else {
9144 SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
9145 SDValue IsMantAllOnes =
9146 DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
9147 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
9148 }
9149
9150 SDValue IsInf;
9151 if (NFBehavior == fltNonfiniteBehavior::IEEE754)
9152 IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
9153 else
9154 IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9155
9156 SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
9157 SDValue IsDenorm =
9158 DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
9159
9160 // Normal value conversion.
9161 const int BiasAdjust = DstBias - SrcBias;
9162 SDValue NormDstExp =
9163 DAG.getNode(ISD::ADD, dl, IntVT, ExpField,
9164 DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
9165
9166 SDValue NormDstMant;
9167 if (DstMant > SrcMant) {
9168 SDValue NormDstMantShift =
9169 DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
9170 NormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
9171 } else {
9172 NormDstMant = MantField;
9173 }
9174
9175 SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
9176 SDValue NormExpShifted =
9177 DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
9178 SDValue NormResult =
9179 DAG.getNode(ISD::OR, dl, IntVT,
9180 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
9181 NormDstMant);
9182
9183 // Denormal value conversion.
9184 SDValue DenormResult;
9185 {
9186 const unsigned IntVTBits = DstBits;
9187 SDValue LeadingZeros =
9188 DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, IntVT, MantField);
9189
9190 const int DenormExpConst =
9191 (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
9192 SDValue DenormDstExp = DAG.getNode(
9193 ISD::SUB, dl, IntVT,
9194 DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
9195 LeadingZeros);
9196
9197 SDValue MantMSB =
9198 DAG.getNode(ISD::SUB, dl, IntVT,
9199 DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
9200
9201 SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
9202 SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
9203
9204 const unsigned ShiftSub = IntVTBits - 1 - DstMant;
9205 SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
9206 DAG.getConstant(ShiftSub, dl, IntVT));
9207
9208 SDValue DenormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
9209
9210 SDValue DenormExpShifted =
9211 DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
9212 DenormResult = DAG.getNode(
9213 ISD::OR, dl, IntVT,
9214 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
9215 DenormDstMant);
9216 }
9217
9218 SDValue FiniteResult =
9219 DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
9220
9221 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9222 SDValue NaNResult =
9223 DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
9224
9225 SDValue InfResult =
9226 DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9227 DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
9228
9229 SDValue ZeroResult = SignShifted;
9230
9231 SDValue Result = FiniteResult;
9232 Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
9233 Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
9234 Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
9235
9236 return DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
9237}
9238
9240 SelectionDAG &DAG) const {
9241 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9242 SDValue Src = Node->getOperand(OpNo);
9243 EVT SrcVT = Src.getValueType();
9244 EVT DstVT = Node->getValueType(0);
9245 SDLoc dl(SDValue(Node, 0));
9246
9247 // FIXME: Only f32 to i64 conversions are supported.
9248 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
9249 return false;
9250
9251 if (Node->isStrictFPOpcode())
9252 // When a NaN is converted to an integer a trap is allowed. We can't
9253 // use this expansion here because it would eliminate that trap. Other
9254 // traps are also allowed and cannot be eliminated. See
9255 // IEEE 754-2008 sec 5.8.
9256 return false;
9257
9258 // Expand f32 -> i64 conversion
9259 // This algorithm comes from compiler-rt's implementation of fixsfdi:
9260 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
9261 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
9262 EVT IntVT = SrcVT.changeTypeToInteger();
9263 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
9264
9265 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
9266 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
9267 SDValue Bias = DAG.getConstant(127, dl, IntVT);
9268 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
9269 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
9270 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
9271
9272 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
9273
9274 SDValue ExponentBits = DAG.getNode(
9275 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
9276 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
9277 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
9278
9279 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
9280 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
9281 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
9282 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
9283
9284 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
9285 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
9286 DAG.getConstant(0x00800000, dl, IntVT));
9287
9288 R = DAG.getZExtOrTrunc(R, dl, DstVT);
9289
9290 R = DAG.getSelectCC(
9291 dl, Exponent, ExponentLoBit,
9292 DAG.getNode(ISD::SHL, dl, DstVT, R,
9293 DAG.getZExtOrTrunc(
9294 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
9295 dl, IntShVT)),
9296 DAG.getNode(ISD::SRL, dl, DstVT, R,
9297 DAG.getZExtOrTrunc(
9298 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
9299 dl, IntShVT)),
9300 ISD::SETGT);
9301
9302 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
9303 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
9304
9305 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
9306 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
9307 return true;
9308}
9309
9311 SDValue &Chain,
9312 SelectionDAG &DAG) const {
9313 SDLoc dl(SDValue(Node, 0));
9314 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9315 SDValue Src = Node->getOperand(OpNo);
9316
9317 EVT SrcVT = Src.getValueType();
9318 EVT DstVT = Node->getValueType(0);
9319 EVT SetCCVT =
9320 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
9321 EVT DstSetCCVT =
9322 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
9323
9324 // Only expand vector types if we have the appropriate vector bit operations.
9325 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
9327 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
9329 return false;
9330
9331 // If the maximum float value is smaller then the signed integer range,
9332 // the destination signmask can't be represented by the float, so we can
9333 // just use FP_TO_SINT directly.
9334 const fltSemantics &APFSem = SrcVT.getFltSemantics();
9335 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
9336 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
9338 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
9339 if (Node->isStrictFPOpcode()) {
9340 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9341 { Node->getOperand(0), Src });
9342 Chain = Result.getValue(1);
9343 } else
9344 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9345 return true;
9346 }
9347
9348 // Don't expand it if there isn't cheap fsub instruction.
9350 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
9351 return false;
9352
9353 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
9354 SDValue Sel;
9355
9356 if (Node->isStrictFPOpcode()) {
9357 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
9358 Node->getOperand(0), /*IsSignaling*/ true);
9359 Chain = Sel.getValue(1);
9360 } else {
9361 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
9362 }
9363
9364 bool Strict = Node->isStrictFPOpcode() ||
9365 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
9366
9367 if (Strict) {
9368 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9369 // signmask then offset (the result of which should be fully representable).
9370 // Sel = Src < 0x8000000000000000
9371 // FltOfs = select Sel, 0, 0x8000000000000000
9372 // IntOfs = select Sel, 0, 0x8000000000000000
9373 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9374
9375 // TODO: Should any fast-math-flags be set for the FSUB?
9376 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
9377 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
9378 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9379 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
9380 DAG.getConstant(0, dl, DstVT),
9381 DAG.getConstant(SignMask, dl, DstVT));
9382 SDValue SInt;
9383 if (Node->isStrictFPOpcode()) {
9384 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
9385 { Chain, Src, FltOfs });
9386 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9387 { Val.getValue(1), Val });
9388 Chain = SInt.getValue(1);
9389 } else {
9390 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
9391 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
9392 }
9393 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
9394 } else {
9395 // Expand based on maximum range of FP_TO_SINT:
9396 // True = fp_to_sint(Src)
9397 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9398 // Result = select (Src < 0x8000000000000000), True, False
9399
9400 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9401 // TODO: Should any fast-math-flags be set for the FSUB?
9402 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
9403 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9404 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9405 DAG.getConstant(SignMask, dl, DstVT));
9406 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9407 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9408 }
9409 return true;
9410}
9411
9413 SDValue &Chain, SelectionDAG &DAG) const {
9414 // This transform is not correct for converting 0 when rounding mode is set
9415 // to round toward negative infinity which will produce -0.0. So disable
9416 // under strictfp.
9417 if (Node->isStrictFPOpcode())
9418 return false;
9419
9420 SDValue Src = Node->getOperand(0);
9421 EVT SrcVT = Src.getValueType();
9422 EVT DstVT = Node->getValueType(0);
9423
9424 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9425 // it.
9426 if (Node->getFlags().hasNonNeg() &&
9428 Result =
9429 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
9430 return true;
9431 }
9432
9433 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9434 return false;
9435
9436 // Only expand vector types if we have the appropriate vector bit
9437 // operations.
9438 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
9443 return false;
9444
9445 SDLoc dl(SDValue(Node, 0));
9446
9447 // Implementation of unsigned i64 to f64 following the algorithm in
9448 // __floatundidf in compiler_rt. This implementation performs rounding
9449 // correctly in all rounding modes with the exception of converting 0
9450 // when rounding toward negative infinity. In that case the fsub will
9451 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9452 // incorrect.
9453 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
9454 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9455 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
9456 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
9457 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
9458 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
9459
9460 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
9461 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
9462 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
9463 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
9464 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
9465 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
9466 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
9467 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
9468 return true;
9469}
9470
9471SDValue
9473 SelectionDAG &DAG) const {
9474 unsigned Opcode = Node->getOpcode();
9475 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9476 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9477 "Wrong opcode");
9478
9479 if (Node->getFlags().hasNoNaNs()) {
9480 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9481 EVT VT = Node->getValueType(0);
9482 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
9484 VT.isVector())
9485 return SDValue();
9486 SDValue Op1 = Node->getOperand(0);
9487 SDValue Op2 = Node->getOperand(1);
9488 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
9489 Node->getFlags());
9490 }
9491
9492 return SDValue();
9493}
9494
9496 SelectionDAG &DAG) const {
9497 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9498 return Expanded;
9499
9500 EVT VT = Node->getValueType(0);
9501 if (VT.isScalableVector())
9503 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9504
9505 SDLoc dl(Node);
9506 unsigned NewOp =
9508
9509 if (isOperationLegalOrCustom(NewOp, VT)) {
9510 SDValue Quiet0 = Node->getOperand(0);
9511 SDValue Quiet1 = Node->getOperand(1);
9512
9513 if (!Node->getFlags().hasNoNaNs()) {
9514 // Insert canonicalizes if it's possible we need to quiet to get correct
9515 // sNaN behavior.
9516 if (!DAG.isKnownNeverSNaN(Quiet0)) {
9517 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
9518 Node->getFlags());
9519 }
9520 if (!DAG.isKnownNeverSNaN(Quiet1)) {
9521 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
9522 Node->getFlags());
9523 }
9524 }
9525
9526 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
9527 }
9528
9529 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9530 // instead if there are no NaNs.
9531 if (Node->getFlags().hasNoNaNs() ||
9532 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
9533 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
9534 unsigned IEEE2018Op =
9535 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9536 if (isOperationLegalOrCustom(IEEE2018Op, VT))
9537 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
9538 Node->getOperand(1), Node->getFlags());
9539 }
9540
9542 return SelCC;
9543
9544 return SDValue();
9545}
9546
9548 SelectionDAG &DAG) const {
9549 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
9550 return Expanded;
9551
9552 SDLoc DL(N);
9553 SDValue LHS = N->getOperand(0);
9554 SDValue RHS = N->getOperand(1);
9555 unsigned Opc = N->getOpcode();
9556 EVT VT = N->getValueType(0);
9557 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9558 bool IsMax = Opc == ISD::FMAXIMUM;
9559 SDNodeFlags Flags = N->getFlags();
9560
9561 // First, implement comparison not propagating NaN. If no native fmin or fmax
9562 // available, use plain select with setcc instead.
9564 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9565 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9566
9567 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9568 // signed zero behavior.
9569 bool MinMaxMustRespectOrderedZero = false;
9570
9571 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
9572 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
9573 MinMaxMustRespectOrderedZero = true;
9574 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
9575 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
9576 } else {
9578 return DAG.UnrollVectorOp(N);
9579
9580 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9581 SDValue Compare =
9582 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
9583 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
9584 }
9585
9586 // Propagate any NaN of both operands
9587 if (!N->getFlags().hasNoNaNs() &&
9588 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
9589 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
9591 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
9592 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
9593 }
9594
9595 // fminimum/fmaximum requires -0.0 less than +0.0
9596 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9597 !DAG.isKnownNeverLogicalZero(RHS) && !DAG.isKnownNeverLogicalZero(LHS)) {
9598 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9599 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
9600 SDValue TestZero =
9601 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9602 SDValue LCmp = DAG.getSelect(
9603 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
9604 MinMax, Flags);
9605 SDValue RCmp = DAG.getSelect(
9606 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
9607 LCmp, Flags);
9608 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
9609 }
9610
9611 return MinMax;
9612}
9613
9615 SelectionDAG &DAG) const {
9616 SDLoc DL(Node);
9617 SDValue LHS = Node->getOperand(0);
9618 SDValue RHS = Node->getOperand(1);
9619 unsigned Opc = Node->getOpcode();
9620 EVT VT = Node->getValueType(0);
9621 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9622 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9623 SDNodeFlags Flags = Node->getFlags();
9624
9625 unsigned NewOp =
9627
9628 if (isOperationLegalOrCustom(NewOp, VT)) {
9629 if (!Flags.hasNoNaNs()) {
9630 // Insert canonicalizes if it's possible we need to quiet to get correct
9631 // sNaN behavior.
9632 if (!DAG.isKnownNeverSNaN(LHS)) {
9633 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
9634 }
9635 if (!DAG.isKnownNeverSNaN(RHS)) {
9636 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
9637 }
9638 }
9639
9640 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
9641 }
9642
9643 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9644 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9645 if (Flags.hasNoNaNs() ||
9646 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
9647 unsigned IEEE2019Op =
9649 if (isOperationLegalOrCustom(IEEE2019Op, VT))
9650 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
9651 }
9652
9653 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9654 // either one for +0.0 vs -0.0.
9655 if ((Flags.hasNoNaNs() ||
9656 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
9657 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9658 DAG.isKnownNeverLogicalZero(RHS))) {
9659 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9660 if (isOperationLegalOrCustom(IEEE2008Op, VT))
9661 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
9662 }
9663
9664 if (VT.isVector() &&
9667 return DAG.UnrollVectorOp(Node);
9668
9669 // If only one operand is NaN, override it with another operand.
9670 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
9671 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
9672 }
9673 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
9674 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
9675 }
9676
9677 // Always prefer RHS if equal.
9678 SDValue MinMax =
9679 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
9680
9681 // TODO: We need quiet sNaN if strictfp.
9682
9683 // Fixup signed zero behavior.
9684 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9685 DAG.isKnownNeverLogicalZero(RHS)) {
9686 return MinMax;
9687 }
9688 SDValue TestZero =
9689 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9690 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9691 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
9692 EVT IntVT = VT.changeTypeToInteger();
9693 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
9694 SDValue LHSTrunc = LHS;
9696 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
9697 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9698 }
9699 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9700 // we preferred RHS when generate MinMax, if the operands are equal.
9701 SDValue RetZero = DAG.getSelect(
9702 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9703 MinMax, Flags);
9704 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9705}
9706
9707/// Returns a true value if if this FPClassTest can be performed with an ordered
9708/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9709/// std::nullopt if it cannot be performed as a compare with 0.
9710static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9711 const fltSemantics &Semantics,
9712 const MachineFunction &MF) {
9713 FPClassTest OrderedMask = Test & ~fcNan;
9714 FPClassTest NanTest = Test & fcNan;
9715 bool IsOrdered = NanTest == fcNone;
9716 bool IsUnordered = NanTest == fcNan;
9717
9718 // Skip cases that are testing for only a qnan or snan.
9719 if (!IsOrdered && !IsUnordered)
9720 return std::nullopt;
9721
9722 if (OrderedMask == fcZero &&
9723 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9724 return IsOrdered;
9725 if (OrderedMask == (fcZero | fcSubnormal) &&
9726 MF.getDenormalMode(Semantics).inputsAreZero())
9727 return IsOrdered;
9728 return std::nullopt;
9729}
9730
9732 const FPClassTest OrigTestMask,
9733 SDNodeFlags Flags, const SDLoc &DL,
9734 SelectionDAG &DAG) const {
9735 EVT OperandVT = Op.getValueType();
9736 assert(OperandVT.isFloatingPoint());
9737 FPClassTest Test = OrigTestMask;
9738
9739 // Degenerated cases.
9740 if (Test == fcNone)
9741 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9742 if (Test == fcAllFlags)
9743 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9744
9745 // PPC double double is a pair of doubles, of which the higher part determines
9746 // the value class.
9747 if (OperandVT == MVT::ppcf128) {
9748 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9749 DAG.getConstant(1, DL, MVT::i32));
9750 OperandVT = MVT::f64;
9751 }
9752
9753 // Floating-point type properties.
9754 EVT ScalarFloatVT = OperandVT.getScalarType();
9755 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9756 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9757 bool IsF80 = (ScalarFloatVT == MVT::f80);
9758
9759 // Some checks can be implemented using float comparisons, if floating point
9760 // exceptions are ignored.
9761 if (Flags.hasNoFPExcept() &&
9763 FPClassTest FPTestMask = Test;
9764 bool IsInvertedFP = false;
9765
9766 if (FPClassTest InvertedFPCheck =
9767 invertFPClassTestIfSimpler(FPTestMask, true)) {
9768 FPTestMask = InvertedFPCheck;
9769 IsInvertedFP = true;
9770 }
9771
9772 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9773 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9774
9775 // See if we can fold an | fcNan into an unordered compare.
9776 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9777
9778 // Can't fold the ordered check if we're only testing for snan or qnan
9779 // individually.
9780 if ((FPTestMask & fcNan) != fcNan)
9781 OrderedFPTestMask = FPTestMask;
9782
9783 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9784
9785 if (std::optional<bool> IsCmp0 =
9786 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9787 IsCmp0 && (isCondCodeLegalOrCustom(
9788 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9789 OperandVT.getScalarType().getSimpleVT()))) {
9790
9791 // If denormals could be implicitly treated as 0, this is not equivalent
9792 // to a compare with 0 since it will also be true for denormals.
9793 return DAG.getSetCC(DL, ResultVT, Op,
9794 DAG.getConstantFP(0.0, DL, OperandVT),
9795 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9796 }
9797
9798 if (FPTestMask == fcNan &&
9800 OperandVT.getScalarType().getSimpleVT()))
9801 return DAG.getSetCC(DL, ResultVT, Op, Op,
9802 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9803
9804 bool IsOrderedInf = FPTestMask == fcInf;
9805 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9806 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9807 : UnorderedCmpOpcode,
9808 OperandVT.getScalarType().getSimpleVT()) &&
9811 (OperandVT.isVector() &&
9813 // isinf(x) --> fabs(x) == inf
9814 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9815 SDValue Inf =
9816 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9817 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9818 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9819 }
9820
9821 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9822 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9823 : UnorderedCmpOpcode,
9824 OperandVT.getSimpleVT())) {
9825 // isposinf(x) --> x == inf
9826 // isneginf(x) --> x == -inf
9827 // isposinf(x) || nan --> x u== inf
9828 // isneginf(x) || nan --> x u== -inf
9829
9830 SDValue Inf = DAG.getConstantFP(
9831 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9832 OperandVT);
9833 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9834 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9835 }
9836
9837 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9838 // TODO: Could handle ordered case, but it produces worse code for
9839 // x86. Maybe handle ordered if fabs is free?
9840
9841 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9842 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9843
9844 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9845 OperandVT.getScalarType().getSimpleVT())) {
9846 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9847
9848 // TODO: Maybe only makes sense if fabs is free. Integer test of
9849 // exponent bits seems better for x86.
9850 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9851 SDValue SmallestNormal = DAG.getConstantFP(
9852 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9853 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9854 IsOrdered ? OrderedOp : UnorderedOp);
9855 }
9856 }
9857
9858 if (FPTestMask == fcNormal) {
9859 // TODO: Handle unordered
9860 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9861 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9862
9863 if (isCondCodeLegalOrCustom(IsFiniteOp,
9864 OperandVT.getScalarType().getSimpleVT()) &&
9865 isCondCodeLegalOrCustom(IsNormalOp,
9866 OperandVT.getScalarType().getSimpleVT()) &&
9867 isFAbsFree(OperandVT)) {
9868 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9869 SDValue Inf =
9870 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9871 SDValue SmallestNormal = DAG.getConstantFP(
9872 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9873
9874 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9875 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9876 SDValue IsNormal =
9877 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9878 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9879 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9880 }
9881 }
9882 }
9883
9884 // Some checks may be represented as inversion of simpler check, for example
9885 // "inf|normal|subnormal|zero" => !"nan".
9886 bool IsInverted = false;
9887
9888 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9889 Test = InvertedCheck;
9890 IsInverted = true;
9891 }
9892
9893 // In the general case use integer operations.
9894 unsigned BitSize = OperandVT.getScalarSizeInBits();
9895 EVT IntVT = OperandVT.changeElementType(
9896 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9897 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9898
9899 // Various masks.
9900 APInt SignBit = APInt::getSignMask(BitSize);
9901 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9902 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9903 const unsigned ExplicitIntBitInF80 = 63;
9904 APInt ExpMask = Inf;
9905 if (IsF80)
9906 ExpMask.clearBit(ExplicitIntBitInF80);
9907 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9908 APInt QNaNBitMask =
9909 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9910 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9911
9912 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9913 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9914 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9915 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9916 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9917 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9918
9919 SDValue Res;
9920 const auto appendResult = [&](SDValue PartialRes) {
9921 if (PartialRes) {
9922 if (Res)
9923 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9924 else
9925 Res = PartialRes;
9926 }
9927 };
9928
9929 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9930 const auto getIntBitIsSet = [&]() -> SDValue {
9931 if (!IntBitIsSetV) {
9932 APInt IntBitMask(BitSize, 0);
9933 IntBitMask.setBit(ExplicitIntBitInF80);
9934 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9935 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9936 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9937 }
9938 return IntBitIsSetV;
9939 };
9940
9941 // Split the value into sign bit and absolute value.
9942 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9943 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9944 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9945
9946 // Tests that involve more than one class should be processed first.
9947 SDValue PartialRes;
9948
9949 if (IsF80)
9950 ; // Detect finite numbers of f80 by checking individual classes because
9951 // they have different settings of the explicit integer bit.
9952 else if ((Test & fcFinite) == fcFinite) {
9953 // finite(V) ==> (a << 1) < (inf << 1)
9954 //
9955 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
9956 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
9957
9959 "finite check requires IEEE-like FP");
9960
9961 SDValue One = DAG.getShiftAmountConstant(1, IntVT, DL);
9962 SDValue TwiceOp = DAG.getNode(ISD::SHL, DL, IntVT, OpAsInt, One);
9963 SDValue TwiceInf = DAG.getNode(ISD::SHL, DL, IntVT, ExpMaskV, One);
9964
9965 PartialRes = DAG.getSetCC(DL, ResultVT, TwiceOp, TwiceInf, ISD::SETULT);
9966 Test &= ~fcFinite;
9967 } else if ((Test & fcFinite) == fcPosFinite) {
9968 // finite(V) && V > 0 ==> V < exp_mask
9969 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9970 Test &= ~fcPosFinite;
9971 } else if ((Test & fcFinite) == fcNegFinite) {
9972 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9973 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9974 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9975 Test &= ~fcNegFinite;
9976 }
9977 appendResult(PartialRes);
9978
9979 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9980 // fcZero | fcSubnormal => test all exponent bits are 0
9981 // TODO: Handle sign bit specific cases
9982 if (PartialCheck == (fcZero | fcSubnormal)) {
9983 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9984 SDValue ExpIsZero =
9985 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9986 appendResult(ExpIsZero);
9987 Test &= ~PartialCheck & fcAllFlags;
9988 }
9989 }
9990
9991 // Check for individual classes.
9992
9993 if (unsigned PartialCheck = Test & fcZero) {
9994 if (PartialCheck == fcPosZero)
9995 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9996 else if (PartialCheck == fcZero)
9997 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9998 else // ISD::fcNegZero
9999 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
10000 appendResult(PartialRes);
10001 }
10002
10003 if (unsigned PartialCheck = Test & fcSubnormal) {
10004 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
10005 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
10006 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
10007 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
10008 SDValue VMinusOneV =
10009 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
10010 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
10011 if (PartialCheck == fcNegSubnormal)
10012 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10013 appendResult(PartialRes);
10014 }
10015
10016 if (unsigned PartialCheck = Test & fcInf) {
10017 if (PartialCheck == fcPosInf)
10018 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
10019 else if (PartialCheck == fcInf)
10020 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
10021 else { // ISD::fcNegInf
10022 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10023 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
10024 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
10025 }
10026 appendResult(PartialRes);
10027 }
10028
10029 if (unsigned PartialCheck = Test & fcNan) {
10030 APInt InfWithQnanBit = Inf | QNaNBitMask;
10031 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
10032 if (PartialCheck == fcNan) {
10033 // isnan(V) ==> abs(V) > int(inf)
10034 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10035 if (IsF80) {
10036 // Recognize unsupported values as NaNs for compatibility with glibc.
10037 // In them (exp(V)==0) == int_bit.
10038 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
10039 SDValue ExpIsZero =
10040 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
10041 SDValue IsPseudo =
10042 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
10043 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
10044 }
10045 } else if (PartialCheck == fcQNan) {
10046 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
10047 PartialRes =
10048 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
10049 } else { // ISD::fcSNan
10050 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
10051 // abs(V) < (unsigned(Inf) | quiet_bit)
10052 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10053 SDValue IsNotQnan =
10054 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
10055 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
10056 }
10057 appendResult(PartialRes);
10058 }
10059
10060 if (unsigned PartialCheck = Test & fcNormal) {
10061 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
10062 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10063 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
10064 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
10065 APInt ExpLimit = ExpMask - ExpLSB;
10066 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
10067 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
10068 if (PartialCheck == fcNegNormal)
10069 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10070 else if (PartialCheck == fcPosNormal) {
10071 SDValue PosSignV =
10072 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
10073 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
10074 }
10075 if (IsF80)
10076 PartialRes =
10077 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
10078 appendResult(PartialRes);
10079 }
10080
10081 if (!Res)
10082 return DAG.getConstant(IsInverted, DL, ResultVT);
10083 if (IsInverted)
10084 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
10085 return Res;
10086}
10087
10088// Only expand vector types if we have the appropriate vector bit operations.
10089static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
10090 assert(VT.isVector() && "Expected vector type");
10091 unsigned Len = VT.getScalarSizeInBits();
10092 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
10095 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
10097}
10098
10100 SDLoc dl(Node);
10101 EVT VT = Node->getValueType(0);
10102 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10103 SDValue Op = Node->getOperand(0);
10104 unsigned Len = VT.getScalarSizeInBits();
10105 assert(VT.isInteger() && "CTPOP not implemented for this type.");
10106
10107 // TODO: Add support for irregular type lengths.
10108 if (!(Len <= 128 && Len % 8 == 0))
10109 return SDValue();
10110
10111 // Only expand vector types if we have the appropriate vector bit operations.
10112 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
10113 return SDValue();
10114
10115 // This is the "best" algorithm from
10116 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10117 SDValue Mask55 =
10118 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10119 SDValue Mask33 =
10120 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10121 SDValue Mask0F =
10122 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10123
10124 // v = v - ((v >> 1) & 0x55555555...)
10125 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
10126 DAG.getNode(ISD::AND, dl, VT,
10127 DAG.getNode(ISD::SRL, dl, VT, Op,
10128 DAG.getConstant(1, dl, ShVT)),
10129 Mask55));
10130 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10131 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
10132 DAG.getNode(ISD::AND, dl, VT,
10133 DAG.getNode(ISD::SRL, dl, VT, Op,
10134 DAG.getConstant(2, dl, ShVT)),
10135 Mask33));
10136 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10137 Op = DAG.getNode(ISD::AND, dl, VT,
10138 DAG.getNode(ISD::ADD, dl, VT, Op,
10139 DAG.getNode(ISD::SRL, dl, VT, Op,
10140 DAG.getConstant(4, dl, ShVT))),
10141 Mask0F);
10142
10143 if (Len <= 8)
10144 return Op;
10145
10146 // Avoid the multiply if we only have 2 bytes to add.
10147 // TODO: Only doing this for scalars because vectors weren't as obviously
10148 // improved.
10149 if (Len == 16 && !VT.isVector()) {
10150 // v = (v + (v >> 8)) & 0x00FF;
10151 return DAG.getNode(ISD::AND, dl, VT,
10152 DAG.getNode(ISD::ADD, dl, VT, Op,
10153 DAG.getNode(ISD::SRL, dl, VT, Op,
10154 DAG.getConstant(8, dl, ShVT))),
10155 DAG.getConstant(0xFF, dl, VT));
10156 }
10157
10158 // v = (v * 0x01010101...) >> (Len - 8)
10159 SDValue V;
10162 SDValue Mask01 =
10163 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10164 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
10165 } else {
10166 V = Op;
10167 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10168 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10169 V = DAG.getNode(ISD::ADD, dl, VT, V,
10170 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
10171 }
10172 }
10173 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
10174}
10175
10177 SDLoc dl(Node);
10178 EVT VT = Node->getValueType(0);
10179 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10180 SDValue Op = Node->getOperand(0);
10181 SDValue Mask = Node->getOperand(1);
10182 SDValue VL = Node->getOperand(2);
10183 unsigned Len = VT.getScalarSizeInBits();
10184 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
10185
10186 // TODO: Add support for irregular type lengths.
10187 if (!(Len <= 128 && Len % 8 == 0))
10188 return SDValue();
10189
10190 // This is same algorithm of expandCTPOP from
10191 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10192 SDValue Mask55 =
10193 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10194 SDValue Mask33 =
10195 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10196 SDValue Mask0F =
10197 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10198
10199 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
10200
10201 // v = v - ((v >> 1) & 0x55555555...)
10202 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
10203 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10204 DAG.getConstant(1, dl, ShVT), Mask, VL),
10205 Mask55, Mask, VL);
10206 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
10207
10208 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10209 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
10210 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
10211 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10212 DAG.getConstant(2, dl, ShVT), Mask, VL),
10213 Mask33, Mask, VL);
10214 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
10215
10216 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10217 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
10218 Mask, VL),
10219 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
10220 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
10221
10222 if (Len <= 8)
10223 return Op;
10224
10225 // v = (v * 0x01010101...) >> (Len - 8)
10226 SDValue V;
10228 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
10229 SDValue Mask01 =
10230 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10231 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
10232 } else {
10233 V = Op;
10234 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10235 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10236 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
10237 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
10238 Mask, VL);
10239 }
10240 }
10241 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
10242 Mask, VL);
10243}
10244
10246 SDLoc dl(Node);
10247 EVT VT = Node->getValueType(0);
10248 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10249 SDValue Op = Node->getOperand(0);
10250 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10251
10252 // If the non-ZERO_POISON version is supported we can use that instead.
10253 if (Node->getOpcode() == ISD::CTLZ_ZERO_POISON &&
10255 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
10256
10257 // If the ZERO_POISON version is supported use that and handle the zero case.
10259 EVT SetCCVT =
10260 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10261 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Op);
10262 SDValue Zero = DAG.getConstant(0, dl, VT);
10263 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10264 return DAG.getSelect(dl, VT, SrcIsZero,
10265 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
10266 }
10267
10268 // Only expand vector types if we have the appropriate vector bit operations.
10269 // This includes the operations needed to expand CTPOP if it isn't supported.
10270 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10272 !canExpandVectorCTPOP(*this, VT)) ||
10275 return SDValue();
10276
10277 // for now, we do this:
10278 // x = x | (x >> 1);
10279 // x = x | (x >> 2);
10280 // ...
10281 // x = x | (x >>16);
10282 // x = x | (x >>32); // for 64-bit input
10283 // return popcount(~x);
10284 //
10285 // Ref: "Hacker's Delight" by Henry Warren
10286 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10287 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10288 Op = DAG.getNode(ISD::OR, dl, VT, Op,
10289 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
10290 }
10291 Op = DAG.getNOT(dl, Op, VT);
10292 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
10293}
10294
10296 SDLoc dl(Node);
10297 EVT VT = Node->getValueType(0);
10298 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10299 SDValue Op = Node->getOperand(0);
10300 SDValue Mask = Node->getOperand(1);
10301 SDValue VL = Node->getOperand(2);
10302 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10303
10304 // do this:
10305 // x = x | (x >> 1);
10306 // x = x | (x >> 2);
10307 // ...
10308 // x = x | (x >>16);
10309 // x = x | (x >>32); // for 64-bit input
10310 // return popcount(~x);
10311 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10312 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10313 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
10314 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
10315 VL);
10316 }
10317 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
10318 Mask, VL);
10319 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
10320}
10321
10323 SDLoc dl(Node);
10324 EVT VT = Node->getValueType(0);
10325 SDValue Op = DAG.getFreeze(Node->getOperand(0));
10326 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10327
10328 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
10329 // This transforms the sign bits into leading zeros that can be counted.
10330 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
10331 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
10332 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
10333 SDValue Shl =
10334 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10335 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
10336 return DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Or);
10337}
10338
10340 const SDLoc &DL, EVT VT, SDValue Op,
10341 unsigned BitWidth) const {
10342 if (BitWidth != 32 && BitWidth != 64)
10343 return SDValue();
10344
10345 const DataLayout &TD = DAG.getDataLayout();
10347 return SDValue();
10348
10349 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10350 : APInt(64, 0x0218A392CD3D5DBFULL);
10351 MachinePointerInfo PtrInfo =
10353 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
10354 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10355 SDValue Lookup = DAG.getNode(
10356 ISD::SRL, DL, VT,
10357 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
10358 DAG.getConstant(DeBruijn, DL, VT)),
10359 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
10361
10363 for (unsigned i = 0; i < BitWidth; i++) {
10364 APInt Shl = DeBruijn.shl(i);
10365 APInt Lshr = Shl.lshr(ShiftAmt);
10366 Table[Lshr.getZExtValue()] = i;
10367 }
10368
10369 // Create a ConstantArray in Constant Pool
10370 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
10371 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
10372 TD.getPrefTypeAlign(CA->getType()));
10373 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
10374 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
10375 PtrInfo, MVT::i8);
10376 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON)
10377 return ExtLoad;
10378
10379 EVT SetCCVT =
10380 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10381 SDValue Zero = DAG.getConstant(0, DL, VT);
10382 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
10383 return DAG.getSelect(DL, VT, SrcIsZero,
10384 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
10385}
10386
10388 SDLoc dl(Node);
10389 EVT VT = Node->getValueType(0);
10390 SDValue Op = Node->getOperand(0);
10391 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10392
10393 // If the non-ZERO_POISON version is supported we can use that instead.
10394 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON &&
10396 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
10397
10398 // If the ZERO_POISON version is supported use that and handle the zero case.
10400 EVT SetCCVT =
10401 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10402 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_POISON, dl, VT, Op);
10403 SDValue Zero = DAG.getConstant(0, dl, VT);
10404 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10405 return DAG.getSelect(dl, VT, SrcIsZero,
10406 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
10407 }
10408
10409 // Only expand vector types if we have the appropriate vector bit operations.
10410 // This includes the operations needed to expand CTPOP if it isn't supported.
10411 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10414 !canExpandVectorCTPOP(*this, VT)) ||
10418 return SDValue();
10419
10420 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10421 // to be expanded or converted to a libcall.
10424 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
10425 return V;
10426
10427 // for now, we use: { return popcount(~x & (x - 1)); }
10428 // unless the target has ctlz but not ctpop, in which case we use:
10429 // { return 32 - nlz(~x & (x-1)); }
10430 // Ref: "Hacker's Delight" by Henry Warren
10431 SDValue Tmp = DAG.getNode(
10432 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
10433 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
10434
10435 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10437 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
10438 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
10439 }
10440
10441 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
10442}
10443
10445 SDValue Op = Node->getOperand(0);
10446 SDValue Mask = Node->getOperand(1);
10447 SDValue VL = Node->getOperand(2);
10448 SDLoc dl(Node);
10449 EVT VT = Node->getValueType(0);
10450
10451 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10452 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
10453 DAG.getAllOnesConstant(dl, VT), Mask, VL);
10454 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
10455 DAG.getConstant(1, dl, VT), Mask, VL);
10456 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
10457 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
10458}
10459
10461 SelectionDAG &DAG) const {
10462 // %cond = to_bool_vec %source
10463 // %splat = splat /*val=*/VL
10464 // %tz = step_vector
10465 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10466 // %r = vp.reduce.umin %v
10467 SDLoc DL(N);
10468 SDValue Source = N->getOperand(0);
10469 SDValue Mask = N->getOperand(1);
10470 SDValue EVL = N->getOperand(2);
10471 EVT SrcVT = Source.getValueType();
10472 EVT ResVT = N->getValueType(0);
10473 EVT ResVecVT =
10474 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
10475
10476 // Convert to boolean vector.
10477 if (SrcVT.getScalarType() != MVT::i1) {
10478 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
10479 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
10480 SrcVT.getVectorElementCount());
10481 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
10482 DAG.getCondCode(ISD::SETNE), Mask, EVL);
10483 }
10484
10485 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
10486 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
10487 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
10488 SDValue Select =
10489 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
10490 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
10491}
10492
10493/// Returns a type-legalized version of \p Mask as the first item in the
10494/// pair. The second item contains a type-legalized step vector that's
10495/// guaranteed to fit the number of elements in \p Mask.
10496/// If the stepvector would require splitting, returns an empty SDValue
10497/// as the second item to signal that the operation should be split instead.
10498static std::pair<SDValue, SDValue>
10500 SelectionDAG &DAG) {
10501 EVT MaskVT = Mask.getValueType();
10502 EVT BoolVT = MaskVT.getScalarType();
10503
10504 // Find a suitable type for a stepvector.
10505 // If zero is poison, we can assume the upper limit of the result is VF-1.
10506 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10507 if (MaskVT.isScalableVector())
10508 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
10509 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10510 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10511 EVT(TLI.getVectorIdxTy(DAG.getDataLayout())),
10512 MaskVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
10513 // If the step vector element type is smaller than the mask element type,
10514 // use the mask type directly to avoid widening issues.
10515 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
10516 EVT StepVT = MVT::getIntegerVT(EltWidth);
10517 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
10518
10519 // If promotion or widening is required to make the type legal, do it here.
10520 // Promotion of integers within LegalizeVectorOps is looking for types of
10521 // the same size but with a smaller number of larger elements, not the usual
10522 // larger size with the same number of larger elements.
10524 TLI.getTypeAction(*DAG.getContext(), StepVecVT);
10525 SDValue StepVec;
10526 if (TypeAction == TargetLowering::TypePromoteInteger) {
10527 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10528 StepVec = DAG.getStepVector(DL, StepVecVT);
10529 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10530 // For widening, the element count changes. Create a step vector with only
10531 // the original elements valid and zeros for padding. Also widen the mask.
10532 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10533 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10534
10535 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10536 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
10537 SDValue UndefStep = DAG.getPOISON(WideVecVT);
10538 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
10539
10540 // Widen mask: pad with zeros.
10541 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
10542 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
10543 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
10544 } else if (TypeAction == TargetLowering::TypeSplitVector) {
10545 // The stepvector type would require splitting. Signal to the caller
10546 // that the operation should be split instead of expanded.
10547 return {Mask, SDValue()};
10548 } else {
10549 StepVec = DAG.getStepVector(DL, StepVecVT);
10550 }
10551
10552 return {Mask, StepVec};
10553}
10554
10556 SelectionDAG &DAG) const {
10557 SDLoc DL(N);
10558 auto [Mask, StepVec] = getLegalMaskAndStepVector(
10559 N->getOperand(0), /*ZeroIsPoison=*/true, DL, DAG);
10560
10561 // If StepVec is empty, the stepvector would require splitting.
10562 // Split the operation instead and let it be recursively legalized.
10563 if (!StepVec) {
10564 EVT MaskVT = N->getOperand(0).getValueType();
10565 EVT ResVT = N->getValueType(0);
10566
10567 // Split the mask
10568 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MaskVT);
10569 auto [MaskLo, MaskHi] = DAG.SplitVector(N->getOperand(0), DL);
10570
10571 // Create split VECTOR_FIND_LAST_ACTIVE operations
10572 SDValue LoResult =
10573 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskLo);
10574 SDValue HiResult =
10575 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskHi);
10576
10577 // Check if any lane is active in the high mask.
10578 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, MaskHi);
10580 AnyHiActive, DL,
10581 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i1),
10582 MVT::i1);
10583
10584 // Adjust HiResult by adding the number of elements in Lo
10585 SDValue LoNumElts =
10586 DAG.getElementCount(DL, ResVT, LoVT.getVectorElementCount());
10587 SDValue AdjustedHiResult =
10588 DAG.getNode(ISD::ADD, DL, ResVT, HiResult, LoNumElts);
10589
10590 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
10591 return DAG.getNode(ISD::SELECT, DL, ResVT, Cond, AdjustedHiResult,
10592 LoResult);
10593 }
10594
10595 EVT StepVecVT = StepVec.getValueType();
10596 EVT StepVT = StepVec.getValueType().getVectorElementType();
10597
10598 // Zero out lanes with inactive elements, then find the highest remaining
10599 // value from the stepvector.
10600 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
10601 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
10602 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
10603 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
10604}
10605
10607 SelectionDAG &DAG) const {
10608 SDLoc DL(N);
10609 EVT VT = N->getValueType(0);
10610 SDValue SourceValue = N->getOperand(0);
10611 SDValue SinkValue = N->getOperand(1);
10612 SDValue EltSizeInBytes = N->getOperand(2);
10613
10614 // Note: The lane offset is scalable if the mask is scalable.
10615 ElementCount LaneOffsetEC =
10616 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
10617
10618 EVT AddrVT = SourceValue->getValueType(0);
10619 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
10620
10621 // Take the difference between the pointers and divided by the element size,
10622 // to see how many lanes separate them.
10623 SDValue Diff = DAG.getNode(ISD::SUB, DL, AddrVT, SinkValue, SourceValue);
10624 if (IsReadAfterWrite)
10625 Diff = DAG.getNode(ISD::ABS, DL, AddrVT, Diff);
10626 Diff = DAG.getNode(ISD::SDIV, DL, AddrVT, Diff, EltSizeInBytes);
10627
10628 // The pointers do not alias if:
10629 // * Diff <= 0 (WAR_MASK)
10630 // * Diff == 0 (RAW_MASK)
10631 EVT CmpVT =
10632 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), AddrVT);
10633 SDValue Zero = DAG.getConstant(0, DL, AddrVT);
10634 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
10635 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
10636
10637 // The pointers do not alias if:
10638 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
10639 SDValue LaneOffset = DAG.getElementCount(DL, AddrVT, LaneOffsetEC);
10640 SDValue MaskN = DAG.getSelect(
10641 DL, AddrVT, Cmp,
10643 AddrVT),
10644 Diff);
10645
10646 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
10647}
10648
10650 bool IsNegative) const {
10651 SDLoc dl(N);
10652 EVT VT = N->getValueType(0);
10653 SDValue Op = N->getOperand(0);
10654
10655 // If expanding ABS_MIN_POISON, fall back to ABS if the target supports it.
10656 if (N->getOpcode() == ISD::ABS_MIN_POISON &&
10658 SDValue AbsVal = DAG.getNode(ISD::ABS, dl, VT, Op);
10659 if (IsNegative)
10660 return DAG.getNegative(AbsVal, dl, VT);
10661 return AbsVal;
10662 }
10663
10664 // abs(x) -> smax(x,sub(0,x))
10665 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10667 SDValue Zero = DAG.getConstant(0, dl, VT);
10668 Op = DAG.getFreeze(Op);
10669 return DAG.getNode(ISD::SMAX, dl, VT, Op,
10670 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10671 }
10672
10673 // abs(x) -> umin(x,sub(0,x))
10674 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10676 SDValue Zero = DAG.getConstant(0, dl, VT);
10677 Op = DAG.getFreeze(Op);
10678 return DAG.getNode(ISD::UMIN, dl, VT, Op,
10679 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10680 }
10681
10682 // 0 - abs(x) -> smin(x, sub(0,x))
10683 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
10685 SDValue Zero = DAG.getConstant(0, dl, VT);
10686 Op = DAG.getFreeze(Op);
10687 return DAG.getNode(ISD::SMIN, dl, VT, Op,
10688 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10689 }
10690
10691 // Only expand vector types if we have the appropriate vector operations.
10692 if (VT.isVector() &&
10694 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
10695 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
10697 return SDValue();
10698
10699 Op = DAG.getFreeze(Op);
10700 SDValue Shift = DAG.getNode(
10701 ISD::SRA, dl, VT, Op,
10702 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10703 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
10704
10705 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10706 if (!IsNegative)
10707 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
10708
10709 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10710 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
10711}
10712
10714 SDLoc dl(N);
10715 EVT VT = N->getValueType(0);
10716 SDValue LHS = N->getOperand(0);
10717 SDValue RHS = N->getOperand(1);
10718 bool IsSigned = N->getOpcode() == ISD::ABDS;
10719
10720 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10721 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10722 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10723 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10724 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
10725 LHS = DAG.getFreeze(LHS);
10726 RHS = DAG.getFreeze(RHS);
10727 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
10728 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
10729 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
10730 }
10731
10732 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10733 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
10734 LHS = DAG.getFreeze(LHS);
10735 RHS = DAG.getFreeze(RHS);
10736 return DAG.getNode(ISD::OR, dl, VT,
10737 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
10738 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
10739 }
10740
10741 // If the subtract doesn't overflow then just use abs(sub())
10742 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
10743
10744 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
10745 return DAG.getNode(ISD::ABS, dl, VT,
10746 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
10747
10748 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
10749 return DAG.getNode(ISD::ABS, dl, VT,
10750 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10751
10752 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10754 LHS = DAG.getFreeze(LHS);
10755 RHS = DAG.getFreeze(RHS);
10756 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
10757
10758 // Branchless expansion iff cmp result is allbits:
10759 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10760 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10761 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10762 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
10763 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
10764 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
10765 }
10766
10767 // Similar to the branchless expansion, if we don't prefer selects, use the
10768 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10769 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10770 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10771 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10773 SDValue USubO =
10774 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
10775 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
10776 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
10777 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
10778 }
10779
10780 // FIXME: Should really try to split the vector in case it's legal on a
10781 // subvector.
10783 return DAG.UnrollVectorOp(N);
10784
10785 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10786 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10787 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
10788 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10789}
10790
10792 SDLoc dl(N);
10793 EVT VT = N->getValueType(0);
10794 SDValue LHS = N->getOperand(0);
10795 SDValue RHS = N->getOperand(1);
10796
10797 unsigned Opc = N->getOpcode();
10798 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10799 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10800 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10801 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10802 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10803 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10805 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10806 "Unknown AVG node");
10807
10808 // If the operands are already extended, we can add+shift.
10809 bool IsExt =
10810 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
10811 DAG.ComputeNumSignBits(RHS) >= 2) ||
10812 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
10813 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
10814 if (IsExt) {
10815 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
10816 if (!IsFloor)
10817 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
10818 return DAG.getNode(ShiftOpc, dl, VT, Sum,
10819 DAG.getShiftAmountConstant(1, VT, dl));
10820 }
10821
10822 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10823 if (VT.isScalarInteger()) {
10824 EVT ExtVT = VT.widenIntegerElementType(*DAG.getContext());
10825 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
10826 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
10827 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
10828 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
10829 if (!IsFloor)
10830 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
10831 DAG.getConstant(1, dl, ExtVT));
10832 // Just use SRL as we will be truncating away the extended sign bits.
10833 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
10834 DAG.getShiftAmountConstant(1, ExtVT, dl));
10835 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
10836 }
10837 }
10838
10839 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10840 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10843 SDValue UAddWithOverflow =
10844 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10845
10846 SDValue Sum = UAddWithOverflow.getValue(0);
10847 SDValue Overflow = UAddWithOverflow.getValue(1);
10848
10849 // Right shift the sum by 1
10850 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10851 DAG.getShiftAmountConstant(1, VT, dl));
10852
10853 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10854 SDValue OverflowShl = DAG.getNode(
10855 ISD::SHL, dl, VT, ZeroExtOverflow,
10856 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10857
10858 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10859 }
10860
10861 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10862 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10863 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10864 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10865 LHS = DAG.getFreeze(LHS);
10866 RHS = DAG.getFreeze(RHS);
10867 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10868 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10869 SDValue Shift =
10870 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10871 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10872}
10873
10875 SDLoc dl(N);
10876 EVT VT = N->getValueType(0);
10877 SDValue Op = N->getOperand(0);
10878
10879 if (!VT.isSimple())
10880 return SDValue();
10881
10882 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10883 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10884 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10885 default:
10886 return SDValue();
10887 case MVT::i16:
10888 // Use a rotate by 8. This can be further expanded if necessary.
10889 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10890 case MVT::i32:
10891 // This is meant for ARM specifically, which has ROTR but no ROTL.
10892 // t = x ^ rotr(x, 16)
10893 // t = bic(t, 0x00ff0000)
10894 // t = lshr(t, 8)
10895 // x = t ^ rotr(x, 8)
10897 SDValue Rotr16 =
10898 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
10899 SDValue Tmp = DAG.getNode(ISD::XOR, dl, VT, Op, Rotr16);
10900 Tmp = DAG.getNode(ISD::AND, dl, VT, Tmp,
10901 DAG.getConstant(0xFF00FFFF, dl, VT));
10902 Tmp = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(8, dl, SHVT));
10903 SDValue Rotr8 =
10904 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10905 return DAG.getNode(ISD::XOR, dl, VT, Tmp, Rotr8);
10906 }
10907 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10908 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10909 DAG.getConstant(0xFF00, dl, VT));
10910 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10911 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10912 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10913 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10914 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10915 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10916 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10917 case MVT::i64:
10918 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10919 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10920 DAG.getConstant(255ULL<<8, dl, VT));
10921 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10922 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10923 DAG.getConstant(255ULL<<16, dl, VT));
10924 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10925 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10926 DAG.getConstant(255ULL<<24, dl, VT));
10927 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10928 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10929 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10930 DAG.getConstant(255ULL<<24, dl, VT));
10931 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10932 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10933 DAG.getConstant(255ULL<<16, dl, VT));
10934 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10935 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10936 DAG.getConstant(255ULL<<8, dl, VT));
10937 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10938 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10939 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10940 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10941 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10942 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10943 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10944 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10945 }
10946}
10947
10949 SDLoc dl(N);
10950 EVT VT = N->getValueType(0);
10951 SDValue Op = N->getOperand(0);
10952 SDValue Mask = N->getOperand(1);
10953 SDValue EVL = N->getOperand(2);
10954
10955 if (!VT.isSimple())
10956 return SDValue();
10957
10958 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10959 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10960 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10961 default:
10962 return SDValue();
10963 case MVT::i16:
10964 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10965 Mask, EVL);
10966 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10967 Mask, EVL);
10968 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10969 case MVT::i32:
10970 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10971 Mask, EVL);
10972 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10973 Mask, EVL);
10974 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10975 Mask, EVL);
10976 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10977 Mask, EVL);
10978 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10979 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10980 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10981 Mask, EVL);
10982 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10983 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10984 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10985 case MVT::i64:
10986 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10987 Mask, EVL);
10988 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10989 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10990 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10991 Mask, EVL);
10992 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10993 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10994 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10995 Mask, EVL);
10996 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10997 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10998 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10999 Mask, EVL);
11000 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
11001 Mask, EVL);
11002 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
11003 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
11004 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
11005 Mask, EVL);
11006 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
11007 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
11008 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
11009 Mask, EVL);
11010 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11011 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
11012 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
11013 Mask, EVL);
11014 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
11015 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
11016 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
11017 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
11018 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
11019 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
11020 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
11021 }
11022}
11023
11025 SDLoc dl(N);
11026 EVT VT = N->getValueType(0);
11027 SDValue Op = N->getOperand(0);
11028 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11029 unsigned Sz = VT.getScalarSizeInBits();
11030
11031 SDValue Tmp, Tmp2, Tmp3;
11032
11033 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11034 // and finally the i1 pairs.
11035 // TODO: We can easily support i4/i2 legal types if any target ever does.
11036 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11037 // Create the masks - repeating the pattern every byte.
11038 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11039 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11040 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11041
11042 // BSWAP if the type is wider than a single byte.
11043 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
11044
11045 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11046 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
11047 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
11048 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
11049 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
11050 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11051
11052 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11053 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
11054 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
11055 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
11056 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
11057 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11058
11059 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11060 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
11061 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
11062 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
11063 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
11064 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11065 return Tmp;
11066 }
11067
11068 Tmp = DAG.getConstant(0, dl, VT);
11069 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
11070 if (I < J)
11071 Tmp2 =
11072 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
11073 else
11074 Tmp2 =
11075 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
11076
11077 APInt Shift = APInt::getOneBitSet(Sz, J);
11078 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
11079 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
11080 }
11081
11082 return Tmp;
11083}
11084
11086 assert(N->getOpcode() == ISD::VP_BITREVERSE);
11087
11088 SDLoc dl(N);
11089 EVT VT = N->getValueType(0);
11090 SDValue Op = N->getOperand(0);
11091 SDValue Mask = N->getOperand(1);
11092 SDValue EVL = N->getOperand(2);
11093 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11094 unsigned Sz = VT.getScalarSizeInBits();
11095
11096 SDValue Tmp, Tmp2, Tmp3;
11097
11098 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11099 // and finally the i1 pairs.
11100 // TODO: We can easily support i4/i2 legal types if any target ever does.
11101 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11102 // Create the masks - repeating the pattern every byte.
11103 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11104 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11105 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11106
11107 // BSWAP if the type is wider than a single byte.
11108 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
11109
11110 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11111 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
11112 Mask, EVL);
11113 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11114 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
11115 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
11116 Mask, EVL);
11117 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
11118 Mask, EVL);
11119 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11120
11121 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11122 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
11123 Mask, EVL);
11124 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11125 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
11126 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
11127 Mask, EVL);
11128 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
11129 Mask, EVL);
11130 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11131
11132 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11133 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
11134 Mask, EVL);
11135 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11136 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
11137 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
11138 Mask, EVL);
11139 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
11140 Mask, EVL);
11141 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11142 return Tmp;
11143 }
11144 return SDValue();
11145}
11146
11147std::pair<SDValue, SDValue>
11149 SelectionDAG &DAG) const {
11150 SDLoc SL(LD);
11151 SDValue Chain = LD->getChain();
11152 SDValue BasePTR = LD->getBasePtr();
11153 EVT SrcVT = LD->getMemoryVT();
11154 EVT DstVT = LD->getValueType(0);
11155 ISD::LoadExtType ExtType = LD->getExtensionType();
11156
11157 if (SrcVT.isScalableVector())
11158 report_fatal_error("Cannot scalarize scalable vector loads");
11159
11160 unsigned NumElem = SrcVT.getVectorNumElements();
11161
11162 EVT SrcEltVT = SrcVT.getScalarType();
11163 EVT DstEltVT = DstVT.getScalarType();
11164
11165 // A vector must always be stored in memory as-is, i.e. without any padding
11166 // between the elements, since various code depend on it, e.g. in the
11167 // handling of a bitcast of a vector type to int, which may be done with a
11168 // vector store followed by an integer load. A vector that does not have
11169 // elements that are byte-sized must therefore be stored as an integer
11170 // built out of the extracted vector elements.
11171 if (!SrcEltVT.isByteSized()) {
11172 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
11173 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
11174
11175 unsigned NumSrcBits = SrcVT.getSizeInBits();
11176 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
11177
11178 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
11179 SDValue SrcEltBitMask = DAG.getConstant(
11180 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
11181
11182 // Load the whole vector and avoid masking off the top bits as it makes
11183 // the codegen worse.
11184 SDValue Load =
11185 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
11186 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
11187 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11188
11190 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11191 unsigned ShiftIntoIdx =
11192 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11193 SDValue ShiftAmount = DAG.getShiftAmountConstant(
11194 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
11195 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
11196 SDValue Elt =
11197 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
11198 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
11199
11200 if (ExtType != ISD::NON_EXTLOAD) {
11201 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
11202 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
11203 }
11204
11205 Vals.push_back(Scalar);
11206 }
11207
11208 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11209 return std::make_pair(Value, Load.getValue(1));
11210 }
11211
11212 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
11213 assert(SrcEltVT.isByteSized());
11214
11216 SmallVector<SDValue, 8> LoadChains;
11217
11218 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11219 SDValue ScalarLoad = DAG.getExtLoad(
11220 ExtType, SL, DstEltVT, Chain, BasePTR,
11221 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
11222 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11223
11224 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
11225
11226 Vals.push_back(ScalarLoad.getValue(0));
11227 LoadChains.push_back(ScalarLoad.getValue(1));
11228 }
11229
11230 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
11231 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11232
11233 return std::make_pair(Value, NewChain);
11234}
11235
11237 SelectionDAG &DAG) const {
11238 SDLoc SL(ST);
11239
11240 SDValue Chain = ST->getChain();
11241 SDValue BasePtr = ST->getBasePtr();
11242 SDValue Value = ST->getValue();
11243 EVT StVT = ST->getMemoryVT();
11244
11245 if (StVT.isScalableVector())
11246 report_fatal_error("Cannot scalarize scalable vector stores");
11247
11248 // The type of the data we want to save
11249 EVT RegVT = Value.getValueType();
11250 EVT RegSclVT = RegVT.getScalarType();
11251
11252 // The type of data as saved in memory.
11253 EVT MemSclVT = StVT.getScalarType();
11254
11255 unsigned NumElem = StVT.getVectorNumElements();
11256
11257 // A vector must always be stored in memory as-is, i.e. without any padding
11258 // between the elements, since various code depend on it, e.g. in the
11259 // handling of a bitcast of a vector type to int, which may be done with a
11260 // vector store followed by an integer load. A vector that does not have
11261 // elements that are byte-sized must therefore be stored as an integer
11262 // built out of the extracted vector elements.
11263 if (!MemSclVT.isByteSized()) {
11264 unsigned NumBits = StVT.getSizeInBits();
11265 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
11266
11267 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
11268
11269 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11270 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11271 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
11272 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
11273 unsigned ShiftIntoIdx =
11274 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11275 SDValue ShiftAmount =
11276 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
11277 SDValue ShiftedElt =
11278 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
11279 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
11280 }
11281
11282 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
11283 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11284 ST->getAAInfo());
11285 }
11286
11287 // Store Stride in bytes
11288 unsigned Stride = MemSclVT.getSizeInBits() / 8;
11289 assert(Stride && "Zero stride!");
11290 // Extract each of the elements from the original vector and save them into
11291 // memory individually.
11293 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11294 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11295
11296 SDValue Ptr =
11297 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
11298
11299 // This scalar TruncStore may be illegal, but we legalize it later.
11300 SDValue Store = DAG.getTruncStore(
11301 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
11302 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11303 ST->getAAInfo());
11304
11305 Stores.push_back(Store);
11306 }
11307
11308 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
11309}
11310
11311std::pair<SDValue, SDValue>
11313 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
11314 "unaligned indexed loads not implemented!");
11315 SDValue Chain = LD->getChain();
11316 SDValue Ptr = LD->getBasePtr();
11317 EVT VT = LD->getValueType(0);
11318 EVT LoadedVT = LD->getMemoryVT();
11319 SDLoc dl(LD);
11320 auto &MF = DAG.getMachineFunction();
11321
11322 if (VT.isFloatingPoint() || VT.isVector()) {
11323 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
11324 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
11325 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
11326 LoadedVT.isVector()) {
11327 // Scalarize the load and let the individual components be handled.
11328 return scalarizeVectorLoad(LD, DAG);
11329 }
11330
11331 // Expand to a (misaligned) integer load of the same size,
11332 // then bitconvert to floating point or vector.
11333 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
11334 LD->getMemOperand());
11335 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
11336 if (LoadedVT != VT)
11337 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
11338 ISD::ANY_EXTEND, dl, VT, Result);
11339
11340 return std::make_pair(Result, newLoad.getValue(1));
11341 }
11342
11343 // Copy the value to a (aligned) stack slot using (unaligned) integer
11344 // loads and stores, then do a (aligned) load from the stack slot.
11345 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
11346 unsigned LoadedBytes = LoadedVT.getStoreSize();
11347 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11348 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
11349
11350 // Make sure the stack slot is also aligned for the register type.
11351 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
11352 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
11354 SDValue StackPtr = StackBase;
11355 unsigned Offset = 0;
11356
11357 EVT PtrVT = Ptr.getValueType();
11358 EVT StackPtrVT = StackPtr.getValueType();
11359
11360 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11361 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11362
11363 // Do all but one copies using the full register width.
11364 for (unsigned i = 1; i < NumRegs; i++) {
11365 // Load one integer register's worth from the original location.
11366 SDValue Load = DAG.getLoad(
11367 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
11368 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11369 // Follow the load with a store to the stack slot. Remember the store.
11370 Stores.push_back(DAG.getStore(
11371 Load.getValue(1), dl, Load, StackPtr,
11372 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
11373 // Increment the pointers.
11374 Offset += RegBytes;
11375
11376 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11377 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11378 }
11379
11380 // The last copy may be partial. Do an extending load.
11381 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
11382 8 * (LoadedBytes - Offset));
11383 SDValue Load = DAG.getExtLoad(
11384 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
11385 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
11386 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11387 // Follow the load with a store to the stack slot. Remember the store.
11388 // On big-endian machines this requires a truncating store to ensure
11389 // that the bits end up in the right place.
11390 Stores.push_back(DAG.getTruncStore(
11391 Load.getValue(1), dl, Load, StackPtr,
11392 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
11393
11394 // The order of the stores doesn't matter - say it with a TokenFactor.
11395 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11396
11397 // Finally, perform the original load only redirected to the stack slot.
11398 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
11399 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
11400 LoadedVT);
11401
11402 // Callers expect a MERGE_VALUES node.
11403 return std::make_pair(Load, TF);
11404 }
11405
11406 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11407 "Unaligned load of unsupported type.");
11408
11409 // Compute the new VT that is half the size of the old one. This is an
11410 // integer MVT.
11411 unsigned NumBits = LoadedVT.getSizeInBits();
11412 EVT NewLoadedVT;
11413 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
11414 NumBits >>= 1;
11415
11416 Align Alignment = LD->getBaseAlign();
11417 unsigned IncrementSize = NumBits / 8;
11418 ISD::LoadExtType HiExtType = LD->getExtensionType();
11419
11420 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
11421 if (HiExtType == ISD::NON_EXTLOAD)
11422 HiExtType = ISD::ZEXTLOAD;
11423
11424 // Load the value in two parts
11425 SDValue Lo, Hi;
11426 if (DAG.getDataLayout().isLittleEndian()) {
11427 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11428 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11429 LD->getAAInfo());
11430
11431 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11432 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
11433 LD->getPointerInfo().getWithOffset(IncrementSize),
11434 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11435 LD->getAAInfo());
11436 } else {
11437 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11438 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11439 LD->getAAInfo());
11440
11441 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11442 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
11443 LD->getPointerInfo().getWithOffset(IncrementSize),
11444 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11445 LD->getAAInfo());
11446 }
11447
11448 // aggregate the two parts
11449 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
11450 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
11451 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
11452
11453 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
11454 Hi.getValue(1));
11455
11456 return std::make_pair(Result, TF);
11457}
11458
11460 SelectionDAG &DAG) const {
11461 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
11462 "unaligned indexed stores not implemented!");
11463 SDValue Chain = ST->getChain();
11464 SDValue Ptr = ST->getBasePtr();
11465 SDValue Val = ST->getValue();
11466 EVT VT = Val.getValueType();
11467 Align Alignment = ST->getBaseAlign();
11468 auto &MF = DAG.getMachineFunction();
11469 EVT StoreMemVT = ST->getMemoryVT();
11470
11471 SDLoc dl(ST);
11472 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
11473 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11474 if (isTypeLegal(intVT)) {
11475 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
11476 StoreMemVT.isVector()) {
11477 // Scalarize the store and let the individual components be handled.
11478 SDValue Result = scalarizeVectorStore(ST, DAG);
11479 return Result;
11480 }
11481 // Expand to a bitconvert of the value to the integer type of the
11482 // same size, then a (misaligned) int store.
11483 // FIXME: Does not handle truncating floating point stores!
11484 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
11485 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
11486 Alignment, ST->getMemOperand()->getFlags());
11487 return Result;
11488 }
11489 // Do a (aligned) store to a stack slot, then copy from the stack slot
11490 // to the final destination using (unaligned) integer loads and stores.
11491 MVT RegVT = getRegisterType(
11492 *DAG.getContext(),
11493 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
11494 EVT PtrVT = Ptr.getValueType();
11495 unsigned StoredBytes = StoreMemVT.getStoreSize();
11496 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11497 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
11498
11499 // Make sure the stack slot is also aligned for the register type.
11500 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
11501 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11502
11503 // Perform the original store, only redirected to the stack slot.
11504 SDValue Store = DAG.getTruncStore(
11505 Chain, dl, Val, StackPtr,
11506 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
11507
11508 EVT StackPtrVT = StackPtr.getValueType();
11509
11510 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11511 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11513 unsigned Offset = 0;
11514
11515 // Do all but one copies using the full register width.
11516 for (unsigned i = 1; i < NumRegs; i++) {
11517 // Load one integer register's worth from the stack slot.
11518 SDValue Load = DAG.getLoad(
11519 RegVT, dl, Store, StackPtr,
11520 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
11521 // Store it to the final location. Remember the store.
11522 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
11523 ST->getPointerInfo().getWithOffset(Offset),
11524 ST->getBaseAlign(),
11525 ST->getMemOperand()->getFlags()));
11526 // Increment the pointers.
11527 Offset += RegBytes;
11528 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11529 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11530 }
11531
11532 // The last store may be partial. Do a truncating store. On big-endian
11533 // machines this requires an extending load from the stack slot to ensure
11534 // that the bits are in the right place.
11535 EVT LoadMemVT =
11536 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
11537
11538 // Load from the stack slot.
11539 SDValue Load = DAG.getExtLoad(
11540 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
11541 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
11542
11543 Stores.push_back(DAG.getTruncStore(
11544 Load.getValue(1), dl, Load, Ptr,
11545 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
11546 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
11547 // The order of the stores doesn't matter - say it with a TokenFactor.
11548 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11549 return Result;
11550 }
11551
11552 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11553 "Unaligned store of unknown type.");
11554 // Get the half-size VT
11555 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
11556 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11557 unsigned IncrementSize = NumBits / 8;
11558
11559 // Divide the stored value in two parts.
11560 SDValue ShiftAmount =
11561 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
11562 SDValue Lo = Val;
11563 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11564 // fold and not use the upper bits. A smaller constant may be easier to
11565 // materialize.
11566 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
11567 Lo = DAG.getNode(
11568 ISD::AND, dl, VT, Lo,
11569 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
11570 VT));
11571 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
11572
11573 // Store the two parts
11574 SDValue Store1, Store2;
11575 Store1 = DAG.getTruncStore(Chain, dl,
11576 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11577 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
11578 ST->getMemOperand()->getFlags());
11579
11580 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11581 Store2 = DAG.getTruncStore(
11582 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11583 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
11584 ST->getMemOperand()->getFlags(), ST->getAAInfo());
11585
11586 SDValue Result =
11587 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
11588 return Result;
11589}
11590
11591SDValue
11593 const SDLoc &DL, EVT DataVT,
11594 SelectionDAG &DAG,
11595 bool IsCompressedMemory) const {
11597 EVT AddrVT = Addr.getValueType();
11598 EVT MaskVT = Mask.getValueType();
11599 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11600 "Incompatible types of Data and Mask");
11601 if (IsCompressedMemory) {
11602 // Incrementing the pointer according to number of '1's in the mask.
11603 if (DataVT.isScalableVector()) {
11604 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
11605 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
11606 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
11607 } else {
11608 EVT MaskIntVT =
11609 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
11610 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
11611 if (MaskIntVT.getSizeInBits() < 32) {
11612 MaskInIntReg =
11613 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
11614 MaskIntVT = MVT::i32;
11615 }
11616 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
11617 }
11618 // Scale is an element size in bytes.
11619 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
11620 AddrVT);
11621 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
11622 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
11623 } else
11624 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
11625
11626 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
11627}
11628
11630 EVT VecVT, const SDLoc &dl,
11631 ElementCount SubEC) {
11632 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11633 "Cannot index a scalable vector within a fixed-width vector");
11634
11635 unsigned NElts = VecVT.getVectorMinNumElements();
11636 unsigned NumSubElts = SubEC.getKnownMinValue();
11637 EVT IdxVT = Idx.getValueType();
11638
11639 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11640 // If this is a constant index and we know the value plus the number of the
11641 // elements in the subvector minus one is less than the minimum number of
11642 // elements then it's safe to return Idx.
11643 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
11644 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11645 return Idx;
11646 SDValue VS =
11647 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
11648 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11649 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
11650 DAG.getConstant(NumSubElts, dl, IdxVT));
11651 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
11652 }
11653 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
11654 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
11655 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
11656 DAG.getConstant(Imm, dl, IdxVT));
11657 }
11658 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11659 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
11660 DAG.getConstant(MaxIndex, dl, IdxVT));
11661}
11662
11663SDValue
11665 EVT VecVT, SDValue Index,
11666 const SDNodeFlags PtrArithFlags) const {
11668 DAG, VecPtr, VecVT,
11670 Index, PtrArithFlags);
11671}
11672
11673SDValue
11675 EVT VecVT, EVT SubVecVT, SDValue Index,
11676 const SDNodeFlags PtrArithFlags) const {
11677 SDLoc dl(Index);
11678 // Make sure the index type is big enough to compute in.
11679 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
11680
11681 EVT EltVT = VecVT.getVectorElementType();
11682
11683 // Calculate the element offset and add it to the pointer.
11684 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11685 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11686 "Converting bits to bytes lost precision");
11687 assert(SubVecVT.getVectorElementType() == EltVT &&
11688 "Sub-vector must be a vector with matching element type");
11689 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
11690 SubVecVT.getVectorElementCount());
11691
11692 EVT IdxVT = Index.getValueType();
11693 if (SubVecVT.isScalableVector())
11694 Index =
11695 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11696 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
11697
11698 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11699 DAG.getConstant(EltSize, dl, IdxVT));
11700 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
11701}
11702
11703//===----------------------------------------------------------------------===//
11704// Implementation of Emulated TLS Model
11705//===----------------------------------------------------------------------===//
11706
11708 SelectionDAG &DAG) const {
11709 // Access to address of TLS varialbe xyz is lowered to a function call:
11710 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11711 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11712 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
11713 SDLoc dl(GA);
11714
11715 ArgListTy Args;
11716 const GlobalValue *GV =
11718 SmallString<32> NameString("__emutls_v.");
11719 NameString += GV->getName();
11720 StringRef EmuTlsVarName(NameString);
11721 const GlobalVariable *EmuTlsVar =
11722 GV->getParent()->getNamedGlobal(EmuTlsVarName);
11723 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11724 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
11725
11726 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
11727
11729 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11730 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
11731 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11732
11733 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11734 // At last for X86 targets, maybe good for other targets too?
11736 MFI.setAdjustsStack(true); // Is this only for X86 target?
11737 MFI.setHasCalls(true);
11738
11739 assert((GA->getOffset() == 0) &&
11740 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11741 return CallResult.first;
11742}
11743
11745 SelectionDAG &DAG) const {
11746 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11747 if (!isCtlzFast())
11748 return SDValue();
11749 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11750 SDLoc dl(Op);
11751 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
11752 EVT VT = Op.getOperand(0).getValueType();
11753 SDValue Zext = Op.getOperand(0);
11754 if (VT.bitsLT(MVT::i32)) {
11755 VT = MVT::i32;
11756 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
11757 }
11758 unsigned Log2b = Log2_32(VT.getSizeInBits());
11759 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
11760 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
11761 DAG.getConstant(Log2b, dl, MVT::i32));
11762 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
11763 }
11764 return SDValue();
11765}
11766
11768 SDValue Op0 = Node->getOperand(0);
11769 SDValue Op1 = Node->getOperand(1);
11770 EVT VT = Op0.getValueType();
11771 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11772 unsigned Opcode = Node->getOpcode();
11773 SDLoc DL(Node);
11774
11775 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11776 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
11777 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
11778 DAG.SignBitIsZero(Op1))
11779 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
11780
11781 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11782 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
11784 Op0 = DAG.getFreeze(Op0);
11785 SDValue Zero = DAG.getConstant(0, DL, VT);
11786 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11787 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
11788 }
11789
11790 // umin(x,y) -> sub(x,usubsat(x,y))
11791 // TODO: Missing freeze(Op0)?
11792 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
11794 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11795 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
11796 }
11797
11798 // umax(x,y) -> add(x,usubsat(y,x))
11799 // TODO: Missing freeze(Op0)?
11800 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
11802 return DAG.getNode(ISD::ADD, DL, VT, Op0,
11803 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
11804 }
11805
11806 // FIXME: Should really try to split the vector in case it's legal on a
11807 // subvector.
11809 return DAG.UnrollVectorOp(Node);
11810
11811 // Attempt to find an existing SETCC node that we can reuse.
11812 // TODO: Do we need a generic doesSETCCNodeExist?
11813 // TODO: Missing freeze(Op0)/freeze(Op1)?
11814 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11815 ISD::CondCode PrefCommuteCC,
11816 ISD::CondCode AltCommuteCC) {
11817 SDVTList BoolVTList = DAG.getVTList(BoolVT);
11818 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11819 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11820 {Op0, Op1, DAG.getCondCode(CC)})) {
11821 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11822 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11823 }
11824 }
11825 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11826 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11827 {Op0, Op1, DAG.getCondCode(CC)})) {
11828 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11829 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
11830 }
11831 }
11832 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
11833 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11834 };
11835
11836 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11837 // -> Y = (A < B) ? B : A
11838 // -> Y = (A >= B) ? A : B
11839 // -> Y = (A <= B) ? B : A
11840 switch (Opcode) {
11841 case ISD::SMAX:
11842 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11843 case ISD::SMIN:
11844 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11845 case ISD::UMAX:
11846 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11847 case ISD::UMIN:
11848 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11849 }
11850
11851 llvm_unreachable("How did we get here?");
11852}
11853
11855 unsigned Opcode = Node->getOpcode();
11856 SDValue LHS = Node->getOperand(0);
11857 SDValue RHS = Node->getOperand(1);
11858 EVT VT = LHS.getValueType();
11859 SDLoc dl(Node);
11860
11861 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11862 assert(VT.isInteger() && "Expected operands to be integers");
11863
11864 // usub.sat(a, b) -> umax(a, b) - b
11865 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11866 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11867 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11868 }
11869
11870 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11871 // Prefer this on targets without legal/cost-effective overflow-carry nodes.
11872 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS) &&
11874 LHS = DAG.getFreeze(LHS);
11875 SDValue Zero = DAG.getConstant(0, dl, VT);
11876 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11877 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11878 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11879 Subtrahend =
11880 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11881 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11882 }
11883
11884 // uadd.sat(a, b) -> umin(a, ~b) + b
11885 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11886 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11887 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11888 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11889 }
11890
11891 unsigned OverflowOp;
11892 switch (Opcode) {
11893 case ISD::SADDSAT:
11894 OverflowOp = ISD::SADDO;
11895 break;
11896 case ISD::UADDSAT:
11897 OverflowOp = ISD::UADDO;
11898 break;
11899 case ISD::SSUBSAT:
11900 OverflowOp = ISD::SSUBO;
11901 break;
11902 case ISD::USUBSAT:
11903 OverflowOp = ISD::USUBO;
11904 break;
11905 default:
11906 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11907 "addition or subtraction node.");
11908 }
11909
11910 // FIXME: Should really try to split the vector in case it's legal on a
11911 // subvector.
11913 return DAG.UnrollVectorOp(Node);
11914
11915 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11916 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11917 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11918 SDValue SumDiff = Result.getValue(0);
11919 SDValue Overflow = Result.getValue(1);
11920 SDValue Zero = DAG.getConstant(0, dl, VT);
11921 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11922
11923 if (Opcode == ISD::UADDSAT) {
11925 // (LHS + RHS) | OverflowMask
11926 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11927 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11928 }
11929 // Overflow ? 0xffff.... : (LHS + RHS)
11930 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11931 }
11932
11933 if (Opcode == ISD::USUBSAT) {
11935 // (LHS - RHS) & ~OverflowMask
11936 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11937 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11938 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11939 }
11940 // Overflow ? 0 : (LHS - RHS)
11941 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11942 }
11943
11944 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
11945 "Expected signed saturating add/sub opcode");
11946
11947 const APInt MinVal = APInt::getSignedMinValue(BitWidth);
11948 const APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
11949
11950 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11951 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11952
11953 // If either of the operand signs are known, then they are guaranteed to
11954 // only saturate in one direction. If non-negative they will saturate
11955 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11956 //
11957 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11958 // sign of 'y' has to be flipped.
11959
11960 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11961 bool RHSIsNonNegative =
11962 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
11963 if (LHSIsNonNegative || RHSIsNonNegative) {
11964 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11965 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11966 }
11967
11968 bool LHSIsNegative = KnownLHS.isNegative();
11969 bool RHSIsNegative =
11970 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
11971 if (LHSIsNegative || RHSIsNegative) {
11972 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11973 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11974 }
11975
11976 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11977 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11978 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11979 DAG.getConstant(BitWidth - 1, dl, VT));
11980 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11981 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11982}
11983
11985 unsigned Opcode = Node->getOpcode();
11986 SDValue LHS = Node->getOperand(0);
11987 SDValue RHS = Node->getOperand(1);
11988 EVT VT = LHS.getValueType();
11989 EVT ResVT = Node->getValueType(0);
11990 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11991 SDLoc dl(Node);
11992
11993 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11994 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11995 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11996 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11997
11998 // We can't perform arithmetic on i1 values. Extending them would
11999 // probably result in worse codegen, so let's just use two selects instead.
12000 // Some targets are also just better off using selects rather than subtraction
12001 // because one of the conditions can be merged with one of the selects.
12002 // And finally, if we don't know the contents of high bits of a boolean value
12003 // we can't perform any arithmetic either.
12005 BoolVT.getScalarSizeInBits() == 1 ||
12007 SDValue SelectZeroOrOne =
12008 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
12009 DAG.getConstant(0, dl, ResVT));
12010 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
12011 SelectZeroOrOne);
12012 }
12013
12015 std::swap(IsGT, IsLT);
12016 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
12017 ResVT);
12018}
12019
12021 unsigned Opcode = Node->getOpcode();
12022 bool IsSigned = Opcode == ISD::SSHLSAT;
12023 SDValue LHS = Node->getOperand(0);
12024 SDValue RHS = Node->getOperand(1);
12025 EVT VT = LHS.getValueType();
12026 SDLoc dl(Node);
12027
12028 assert((Node->getOpcode() == ISD::SSHLSAT ||
12029 Node->getOpcode() == ISD::USHLSAT) &&
12030 "Expected a SHLSAT opcode");
12031 assert(VT.isInteger() && "Expected operands to be integers");
12032
12034 return DAG.UnrollVectorOp(Node);
12035
12036 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
12037
12038 unsigned BW = VT.getScalarSizeInBits();
12039 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12040 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
12041 SDValue Orig =
12042 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
12043
12044 SDValue SatVal;
12045 if (IsSigned) {
12046 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
12047 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
12048 SDValue Cond =
12049 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
12050 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
12051 } else {
12052 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
12053 }
12054 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
12055 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
12056}
12057
12059 bool Signed, SDValue &Lo, SDValue &Hi,
12060 SDValue LHS, SDValue RHS,
12061 SDValue HiLHS, SDValue HiRHS) const {
12062 EVT VT = LHS.getValueType();
12063 assert(RHS.getValueType() == VT && "Mismatching operand types");
12064
12065 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
12066 assert((!Signed || !HiLHS) &&
12067 "Signed flag should only be set when HiLHS and RiRHS are null");
12068
12069 // We'll expand the multiplication by brute force because we have no other
12070 // options. This is a trivially-generalized version of the code from
12071 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
12072 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
12073 // sign bits while calculating the Hi half.
12074 unsigned Bits = VT.getSizeInBits();
12075 unsigned HalfBits = Bits / 2;
12076 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
12077 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
12078 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
12079
12080 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
12081 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
12082
12083 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
12084 // This is always an unsigned shift.
12085 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
12086
12087 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
12088 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
12089 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
12090
12091 SDValue U =
12092 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
12093 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
12094 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
12095
12096 SDValue V =
12097 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
12098 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
12099
12100 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
12101 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
12102
12103 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
12104 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
12105
12106 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
12107 // the products to Hi.
12108 if (HiLHS) {
12109 SDValue RHLL = DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS);
12110 SDValue RLLH = DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS);
12111 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
12112 DAG.getNode(ISD::ADD, dl, VT, RHLL, RLLH));
12113 }
12114}
12115
12117 bool Signed, const SDValue LHS,
12118 const SDValue RHS, SDValue &Lo,
12119 SDValue &Hi) const {
12120 EVT VT = LHS.getValueType();
12121 assert(RHS.getValueType() == VT && "Mismatching operand types");
12122 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12123 // We can fall back to a libcall with an illegal type for the MUL if we
12124 // have a libcall big enough.
12125 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
12126 if (WideVT == MVT::i16)
12127 LC = RTLIB::MUL_I16;
12128 else if (WideVT == MVT::i32)
12129 LC = RTLIB::MUL_I32;
12130 else if (WideVT == MVT::i64)
12131 LC = RTLIB::MUL_I64;
12132 else if (WideVT == MVT::i128)
12133 LC = RTLIB::MUL_I128;
12134
12135 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12136 if (LibcallImpl == RTLIB::Unsupported) {
12137 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
12138 return;
12139 }
12140
12141 SDValue HiLHS, HiRHS;
12142 if (Signed) {
12143 // The high part is obtained by SRA'ing all but one of the bits of low
12144 // part.
12145 unsigned LoSize = VT.getFixedSizeInBits();
12146 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
12147 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
12148 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
12149 } else {
12150 HiLHS = DAG.getConstant(0, dl, VT);
12151 HiRHS = DAG.getConstant(0, dl, VT);
12152 }
12153
12154 // Attempt a libcall.
12155 SDValue Ret;
12157 CallOptions.setIsSigned(Signed);
12158 CallOptions.setIsPostTypeLegalization(true);
12160 // Halves of WideVT are packed into registers in different order
12161 // depending on platform endianness. This is usually handled by
12162 // the C calling convention, but we can't defer to it in
12163 // the legalizer.
12164 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
12165 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12166 } else {
12167 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
12168 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12169 }
12171 "Ret value is a collection of constituent nodes holding result.");
12172 if (DAG.getDataLayout().isLittleEndian()) {
12173 // Same as above.
12174 Lo = Ret.getOperand(0);
12175 Hi = Ret.getOperand(1);
12176 } else {
12177 Lo = Ret.getOperand(1);
12178 Hi = Ret.getOperand(0);
12179 }
12180}
12181
12182SDValue
12184 assert((Node->getOpcode() == ISD::SMULFIX ||
12185 Node->getOpcode() == ISD::UMULFIX ||
12186 Node->getOpcode() == ISD::SMULFIXSAT ||
12187 Node->getOpcode() == ISD::UMULFIXSAT) &&
12188 "Expected a fixed point multiplication opcode");
12189
12190 SDLoc dl(Node);
12191 SDValue LHS = Node->getOperand(0);
12192 SDValue RHS = Node->getOperand(1);
12193 EVT VT = LHS.getValueType();
12194 unsigned Scale = Node->getConstantOperandVal(2);
12195 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
12196 Node->getOpcode() == ISD::UMULFIXSAT);
12197 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
12198 Node->getOpcode() == ISD::SMULFIXSAT);
12199 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12200 unsigned VTSize = VT.getScalarSizeInBits();
12201
12202 if (!Scale) {
12203 // [us]mul.fix(a, b, 0) -> mul(a, b)
12204 if (!Saturating) {
12206 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12207 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
12208 SDValue Result =
12209 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12210 SDValue Product = Result.getValue(0);
12211 SDValue Overflow = Result.getValue(1);
12212 SDValue Zero = DAG.getConstant(0, dl, VT);
12213
12214 APInt MinVal = APInt::getSignedMinValue(VTSize);
12215 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
12216 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12217 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12218 // Xor the inputs, if resulting sign bit is 0 the product will be
12219 // positive, else negative.
12220 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12221 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
12222 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
12223 return DAG.getSelect(dl, VT, Overflow, Result, Product);
12224 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
12225 SDValue Result =
12226 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12227 SDValue Product = Result.getValue(0);
12228 SDValue Overflow = Result.getValue(1);
12229
12230 APInt MaxVal = APInt::getMaxValue(VTSize);
12231 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12232 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
12233 }
12234 }
12235
12236 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
12237 "Expected scale to be less than the number of bits if signed or at "
12238 "most the number of bits if unsigned.");
12239 assert(LHS.getValueType() == RHS.getValueType() &&
12240 "Expected both operands to be the same type");
12241
12242 // Get the upper and lower bits of the result.
12243 SDValue Lo, Hi;
12244 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
12245 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
12246 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12247 if (isOperationLegalOrCustom(LoHiOp, VT)) {
12248 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
12249 Lo = Result.getValue(0);
12250 Hi = Result.getValue(1);
12251 } else if (isOperationLegalOrCustom(HiOp, VT)) {
12252 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12253 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
12254 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
12255 // Try for a multiplication using a wider type.
12256 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12257 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
12258 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
12259 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
12260 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
12261 SDValue Shifted =
12262 DAG.getNode(ISD::SRA, dl, WideVT, Res,
12263 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
12264 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
12265 } else if (VT.isVector()) {
12266 return SDValue();
12267 } else {
12268 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
12269 }
12270
12271 if (Scale == VTSize)
12272 // Result is just the top half since we'd be shifting by the width of the
12273 // operand. Overflow impossible so this works for both UMULFIX and
12274 // UMULFIXSAT.
12275 return Hi;
12276
12277 // The result will need to be shifted right by the scale since both operands
12278 // are scaled. The result is given to us in 2 halves, so we only want part of
12279 // both in the result.
12280 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
12281 DAG.getShiftAmountConstant(Scale, VT, dl));
12282 if (!Saturating)
12283 return Result;
12284
12285 if (!Signed) {
12286 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
12287 // widened multiplication) aren't all zeroes.
12288
12289 // Saturate to max if ((Hi >> Scale) != 0),
12290 // which is the same as if (Hi > ((1 << Scale) - 1))
12291 APInt MaxVal = APInt::getMaxValue(VTSize);
12292 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
12293 dl, VT);
12294 Result = DAG.getSelectCC(dl, Hi, LowMask,
12295 DAG.getConstant(MaxVal, dl, VT), Result,
12296 ISD::SETUGT);
12297
12298 return Result;
12299 }
12300
12301 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
12302 // widened multiplication) aren't all ones or all zeroes.
12303
12304 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
12305 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
12306
12307 if (Scale == 0) {
12308 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
12309 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
12310 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
12311 // Saturated to SatMin if wide product is negative, and SatMax if wide
12312 // product is positive ...
12313 SDValue Zero = DAG.getConstant(0, dl, VT);
12314 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
12315 ISD::SETLT);
12316 // ... but only if we overflowed.
12317 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
12318 }
12319
12320 // We handled Scale==0 above so all the bits to examine is in Hi.
12321
12322 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
12323 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
12324 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
12325 dl, VT);
12326 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
12327 // Saturate to min if (Hi >> (Scale - 1)) < -1),
12328 // which is the same as if (HI < (-1 << (Scale - 1))
12329 SDValue HighMask =
12330 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
12331 dl, VT);
12332 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
12333 return Result;
12334}
12335
12336SDValue
12338 SDValue LHS, SDValue RHS,
12339 unsigned Scale, SelectionDAG &DAG) const {
12340 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
12341 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
12342 "Expected a fixed point division opcode");
12343
12344 EVT VT = LHS.getValueType();
12345 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
12346 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
12347 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12348
12349 // If there is enough room in the type to upscale the LHS or downscale the
12350 // RHS before the division, we can perform it in this type without having to
12351 // resize. For signed operations, the LHS headroom is the number of
12352 // redundant sign bits, and for unsigned ones it is the number of zeroes.
12353 // The headroom for the RHS is the number of trailing zeroes.
12354 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
12356 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12357
12358 // For signed saturating operations, we need to be able to detect true integer
12359 // division overflow; that is, when you have MIN / -EPS. However, this
12360 // is undefined behavior and if we emit divisions that could take such
12361 // values it may cause undesired behavior (arithmetic exceptions on x86, for
12362 // example).
12363 // Avoid this by requiring an extra bit so that we never get this case.
12364 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
12365 // signed saturating division, we need to emit a whopping 32-bit division.
12366 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
12367 return SDValue();
12368
12369 unsigned LHSShift = std::min(LHSLead, Scale);
12370 unsigned RHSShift = Scale - LHSShift;
12371
12372 // At this point, we know that if we shift the LHS up by LHSShift and the
12373 // RHS down by RHSShift, we can emit a regular division with a final scaling
12374 // factor of Scale.
12375
12376 if (LHSShift)
12377 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
12378 DAG.getShiftAmountConstant(LHSShift, VT, dl));
12379 if (RHSShift)
12380 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
12381 DAG.getShiftAmountConstant(RHSShift, VT, dl));
12382
12383 SDValue Quot;
12384 if (Signed) {
12385 // For signed operations, if the resulting quotient is negative and the
12386 // remainder is nonzero, subtract 1 from the quotient to round towards
12387 // negative infinity.
12388 SDValue Rem;
12389 // FIXME: Ideally we would always produce an SDIVREM here, but if the
12390 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
12391 // we couldn't just form a libcall, but the type legalizer doesn't do it.
12392 if (isTypeLegal(VT) &&
12394 Quot = DAG.getNode(ISD::SDIVREM, dl,
12395 DAG.getVTList(VT, VT),
12396 LHS, RHS);
12397 Rem = Quot.getValue(1);
12398 Quot = Quot.getValue(0);
12399 } else {
12400 Quot = DAG.getNode(ISD::SDIV, dl, VT,
12401 LHS, RHS);
12402 Rem = DAG.getNode(ISD::SREM, dl, VT,
12403 LHS, RHS);
12404 }
12405 SDValue Zero = DAG.getConstant(0, dl, VT);
12406 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
12407 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
12408 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
12409 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
12410 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
12411 DAG.getConstant(1, dl, VT));
12412 Quot = DAG.getSelect(dl, VT,
12413 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
12414 Sub1, Quot);
12415 } else
12416 Quot = DAG.getNode(ISD::UDIV, dl, VT,
12417 LHS, RHS);
12418
12419 return Quot;
12420}
12421
12423 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12424 SDLoc dl(Node);
12425 SDValue LHS = Node->getOperand(0);
12426 SDValue RHS = Node->getOperand(1);
12427 bool IsAdd = Node->getOpcode() == ISD::UADDO;
12428
12429 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
12430 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
12431 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
12432 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
12433 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
12434 { LHS, RHS, CarryIn });
12435 Result = SDValue(NodeCarry.getNode(), 0);
12436 Overflow = SDValue(NodeCarry.getNode(), 1);
12437 return;
12438 }
12439
12440 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12441 LHS.getValueType(), LHS, RHS);
12442
12443 EVT ResultType = Node->getValueType(1);
12444 EVT SetCCType = getSetCCResultType(
12445 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12446 SDValue SetCC;
12447 if (IsAdd && isOneConstant(RHS)) {
12448 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
12449 // the live range of X. We assume comparing with 0 is cheap.
12450 // The general case (X + C) < C is not necessarily beneficial. Although we
12451 // reduce the live range of X, we may introduce the materialization of
12452 // constant C.
12453 SetCC =
12454 DAG.getSetCC(dl, SetCCType, Result,
12455 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
12456 } else if (IsAdd && isAllOnesConstant(RHS)) {
12457 // Special case: uaddo X, -1 overflows if X != 0.
12458 SetCC =
12459 DAG.getSetCC(dl, SetCCType, LHS,
12460 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
12461 } else {
12462 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
12463 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
12464 }
12465 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12466}
12467
12469 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12470 SDLoc dl(Node);
12471 SDValue LHS = Node->getOperand(0);
12472 SDValue RHS = Node->getOperand(1);
12473 bool IsAdd = Node->getOpcode() == ISD::SADDO;
12474
12475 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12476 LHS.getValueType(), LHS, RHS);
12477
12478 EVT ResultType = Node->getValueType(1);
12479 EVT OType = getSetCCResultType(
12480 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12481
12482 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
12483 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
12484 if (isOperationLegal(OpcSat, LHS.getValueType())) {
12485 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
12486 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
12487 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12488 return;
12489 }
12490
12491 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
12492
12493 if (IsAdd) {
12494 // For an addition, the result should be less than one of the operands (LHS)
12495 // if and only if the other operand (RHS) is negative, otherwise there will
12496 // be overflow.
12497 SDValue ResultLowerThanLHS =
12498 DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
12499 SDValue RHSNegative = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETLT);
12500 Overflow = DAG.getBoolExtOrTrunc(
12501 DAG.getNode(ISD::XOR, dl, OType, RHSNegative, ResultLowerThanLHS), dl,
12502 ResultType, ResultType);
12503 } else {
12504 // For subtraction, overflow occurs when the signed comparison of operands
12505 // doesn't match the sign of the result.
12506 SDValue LHSLessThanRHS = DAG.getSetCC(dl, OType, LHS, RHS, ISD::SETLT);
12507 SDValue ResultNegative = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETLT);
12508 Overflow = DAG.getBoolExtOrTrunc(
12509 DAG.getNode(ISD::XOR, dl, OType, LHSLessThanRHS, ResultNegative), dl,
12510 ResultType, ResultType);
12511 }
12512}
12513
12515 SDValue &Overflow, SelectionDAG &DAG) const {
12516 SDLoc dl(Node);
12517 EVT VT = Node->getValueType(0);
12518 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12519 SDValue LHS = Node->getOperand(0);
12520 SDValue RHS = Node->getOperand(1);
12521 bool isSigned = Node->getOpcode() == ISD::SMULO;
12522
12523 // For power-of-two multiplications we can use a simpler shift expansion.
12524 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
12525 const APInt &C = RHSC->getAPIntValue();
12526 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12527 if (C.isPowerOf2()) {
12528 // smulo(x, signed_min) is same as umulo(x, signed_min).
12529 bool UseArithShift = isSigned && !C.isMinSignedValue();
12530 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
12531 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
12532 Overflow = DAG.getSetCC(dl, SetCCVT,
12533 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
12534 dl, VT, Result, ShiftAmt),
12535 LHS, ISD::SETNE);
12536 return true;
12537 }
12538 }
12539
12540 SDValue BottomHalf;
12541 SDValue TopHalf;
12542 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12543
12544 static const unsigned Ops[2][3] =
12547 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
12548 BottomHalf = DAG.getNode(Ops[isSigned][0], dl, DAG.getVTList(VT, VT), LHS,
12549 RHS);
12550 TopHalf = BottomHalf.getValue(1);
12551 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
12552 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12553 TopHalf = DAG.getNode(Ops[isSigned][1], dl, VT, LHS, RHS);
12554 } else if (isTypeLegal(WideVT)) {
12555 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
12556 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
12557 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
12558 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
12559 SDValue ShiftAmt =
12560 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
12561 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
12562 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
12563 } else {
12564 if (VT.isVector())
12565 return false;
12566
12567 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
12568 }
12569
12570 Result = BottomHalf;
12571 if (isSigned) {
12572 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12573 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
12574 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
12575 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
12576 } else {
12577 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
12578 DAG.getConstant(0, dl, VT), ISD::SETNE);
12579 }
12580
12581 // Truncate the result if SetCC returns a larger type than needed.
12582 EVT RType = Node->getValueType(1);
12583 if (RType.bitsLT(Overflow.getValueType()))
12584 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
12585
12586 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12587 "Unexpected result type for S/UMULO legalization");
12588 return true;
12589}
12590
12592 SDLoc dl(Node);
12593 ISD::NodeType BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12594 SDValue Op = Node->getOperand(0);
12595 SDNodeFlags Flags = Node->getFlags();
12596 EVT VT = Op.getValueType();
12597
12598 // Try to use a shuffle reduction for power of two vectors.
12599 if (VT.isPow2VectorType()) {
12600 // See if the reduction opcode is safe to use with widened types.
12601 bool WidenSrc = false;
12602 switch (Node->getOpcode()) {
12605 case ISD::VECREDUCE_ADD:
12606 case ISD::VECREDUCE_MUL:
12607 case ISD::VECREDUCE_AND:
12608 case ISD::VECREDUCE_OR:
12609 case ISD::VECREDUCE_XOR:
12614 WidenSrc = VT.isFixedLengthVector();
12615 break;
12616 }
12617
12619 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12620 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) {
12621 if (WidenSrc && Op.getOpcode() != ISD::BUILD_VECTOR) {
12622 // Attempt to widen the source vectors to a legal op.
12623 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), HalfVT);
12624 if (WideVT.isVector() &&
12625 WideVT.getScalarType() == HalfVT.getScalarType() &&
12626 WideVT.getVectorNumElements() >= HalfVT.getVectorNumElements() &&
12627 isOperationLegalOrCustom(BaseOpcode, WideVT)) {
12628 SDValue Lo, Hi;
12629 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12630 Lo = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Lo, 0);
12631 Hi = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Hi, 0);
12632 Op = DAG.getNode(BaseOpcode, dl, WideVT, Lo, Hi, Flags);
12633 Op = DAG.getExtractSubvector(dl, HalfVT, Op, 0);
12634 VT = HalfVT;
12635 continue;
12636 }
12637 }
12638 break;
12639 }
12640
12641 SDValue Lo, Hi;
12642 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12643 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Flags);
12644 VT = HalfVT;
12645
12646 // Stop if splitting is enough to make the reduction legal.
12647 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
12648 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
12649 Flags);
12650 }
12651 }
12652
12653 if (VT.isScalableVector())
12655 "Expanding reductions for scalable vectors is undefined.");
12656
12657 EVT EltVT = VT.getVectorElementType();
12658 unsigned NumElts = VT.getVectorNumElements();
12659
12661 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
12662
12663 SDValue Res = Ops[0];
12664 for (unsigned i = 1; i < NumElts; i++)
12665 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12666
12667 // Result type may be wider than element type.
12668 if (EltVT != Node->getValueType(0))
12669 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
12670 return Res;
12671}
12672
12674 SDLoc dl(Node);
12675 SDValue AccOp = Node->getOperand(0);
12676 SDValue VecOp = Node->getOperand(1);
12677 SDNodeFlags Flags = Node->getFlags();
12678
12679 EVT VT = VecOp.getValueType();
12680 EVT EltVT = VT.getVectorElementType();
12681
12682 if (VT.isScalableVector())
12684 "Expanding reductions for scalable vectors is undefined.");
12685
12686 unsigned NumElts = VT.getVectorNumElements();
12687
12689 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
12690
12691 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12692
12693 SDValue Res = AccOp;
12694 for (unsigned i = 0; i < NumElts; i++)
12695 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12696
12697 return Res;
12698}
12699
12701 SelectionDAG &DAG) const {
12702 EVT VT = Node->getValueType(0);
12703 SDLoc dl(Node);
12704 bool isSigned = Node->getOpcode() == ISD::SREM;
12705 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12706 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12707 SDValue Dividend = Node->getOperand(0);
12708 SDValue Divisor = Node->getOperand(1);
12709 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
12710 SDVTList VTs = DAG.getVTList(VT, VT);
12711 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
12712 return true;
12713 }
12714 if (isOperationLegalOrCustom(DivOpc, VT)) {
12715 // X % Y -> X-X/Y*Y
12716 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
12717 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
12718 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12719 return true;
12720 }
12721 return false;
12722}
12723
12725 SelectionDAG &DAG) const {
12726 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12727 SDLoc dl(SDValue(Node, 0));
12728 SDValue Src = Node->getOperand(0);
12729
12730 // DstVT is the result type, while SatVT is the size to which we saturate
12731 EVT SrcVT = Src.getValueType();
12732 EVT DstVT = Node->getValueType(0);
12733
12734 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
12735 unsigned SatWidth = SatVT.getScalarSizeInBits();
12736 unsigned DstWidth = DstVT.getScalarSizeInBits();
12737 assert(SatWidth <= DstWidth &&
12738 "Expected saturation width smaller than result width");
12739
12740 // Determine minimum and maximum integer values and their corresponding
12741 // floating-point values.
12742 APInt MinInt, MaxInt;
12743 if (IsSigned) {
12744 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
12745 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
12746 } else {
12747 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
12748 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
12749 }
12750
12751 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12752 // libcall emission cannot handle this. Large result types will fail.
12753 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12754 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
12755 SrcVT = Src.getValueType();
12756 }
12757
12758 const fltSemantics &Sem = SrcVT.getFltSemantics();
12759 APFloat MinFloat(Sem);
12760 APFloat MaxFloat(Sem);
12761
12762 APFloat::opStatus MinStatus =
12763 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
12764 APFloat::opStatus MaxStatus =
12765 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
12766 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12767 !(MaxStatus & APFloat::opStatus::opInexact);
12768
12769 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
12770 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
12771
12772 // If the integer bounds are exactly representable as floats and min/max are
12773 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12774 // of comparisons and selects.
12775 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12776 bool MayPropagateNaN) {
12777 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
12778 isOperationLegalOrCustom(MaxOpcode, SrcVT);
12779 if (!MinMaxLegal)
12780 return SDValue();
12781
12782 SDValue Clamped = Src;
12783
12784 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12785 // then the result is MinFloat.
12786 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
12787 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12788 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
12789 // Convert clamped value to integer.
12790 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12791 dl, DstVT, Clamped);
12792
12793 // If !MayPropagateNan and the conversion is unsigned case we're done,
12794 // because we mapped NaN to MinFloat, which will cast to zero.
12795 if (!MayPropagateNaN && !IsSigned)
12796 return FpToInt;
12797
12798 // Otherwise, select 0 if Src is NaN.
12799 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12800 EVT SetCCVT =
12801 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12802 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12803 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
12804 };
12805 if (AreExactFloatBounds) {
12806 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12807 /*MayPropagateNaN=*/false))
12808 return Res;
12809 // These may propagate NaN for sNaN operands.
12810 if (SDValue Res =
12811 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12812 return Res;
12813 // These always propagate NaN.
12814 if (SDValue Res =
12815 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12816 return Res;
12817 }
12818
12819 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
12820 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
12821
12822 // Result of direct conversion. The assumption here is that the operation is
12823 // non-trapping and it's fine to apply it to an out-of-range value if we
12824 // select it away later.
12825 SDValue FpToInt =
12826 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
12827
12828 SDValue Select = FpToInt;
12829
12830 EVT SetCCVT =
12831 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12832
12833 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12834 // MinInt if Src is NaN.
12835 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
12836 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
12837 // If Src OGT MaxFloat, select MaxInt.
12838 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
12839 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
12840
12841 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12842 // is already zero.
12843 if (!IsSigned)
12844 return Select;
12845
12846 // Otherwise, select 0 if Src is NaN.
12847 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12848 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12849 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
12850}
12851
12853 const SDLoc &dl,
12854 SelectionDAG &DAG) const {
12855 EVT OperandVT = Op.getValueType();
12856 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12857 return Op;
12858 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12859 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12860 // can induce double-rounding which may alter the results. We can
12861 // correct for this using a trick explained in: Boldo, Sylvie, and
12862 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12863 // World Congress. 2005.
12864 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
12865 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
12866
12867 // We can keep the narrow value as-is if narrowing was exact (no
12868 // rounding error), the wide value was NaN (the narrow value is also
12869 // NaN and should be preserved) or if we rounded to the odd value.
12870 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
12871 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
12872 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
12873 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
12874 EVT ResultIntVTCCVT = getSetCCResultType(
12875 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
12876 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
12877 // The result is already odd so we don't need to do anything.
12878 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
12879
12880 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12881 Op.getValueType());
12882 // We keep results which are exact, odd or NaN.
12883 SDValue KeepNarrow =
12884 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
12885 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
12886 // We morally performed a round-down if AbsNarrow is smaller than
12887 // AbsWide.
12888 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
12889 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
12890 SDValue NarrowIsRd =
12891 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
12892 // If the narrow value is odd or exact, pick it.
12893 // Otherwise, narrow is even and corresponds to either the rounded-up
12894 // or rounded-down value. If narrow is the rounded-down value, we want
12895 // the rounded-up value as it will be odd.
12896 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12897 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12898 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12899 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12900}
12901
12903 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12904 SDValue Op = Node->getOperand(0);
12905 EVT VT = Node->getValueType(0);
12906 SDLoc dl(Node);
12907 if (VT.getScalarType() == MVT::bf16) {
12908 if (Node->getConstantOperandVal(1) == 1) {
12909 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12910 }
12911 EVT OperandVT = Op.getValueType();
12912 SDValue IsNaN = DAG.getSetCC(
12913 dl,
12914 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12915 Op, Op, ISD::SETUO);
12916
12917 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12918 // can induce double-rounding which may alter the results. We can
12919 // correct for this using a trick explained in: Boldo, Sylvie, and
12920 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12921 // World Congress. 2005.
12922 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12923 EVT I32 = F32.changeTypeToInteger();
12924 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12925 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12926
12927 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12928 // turning into infinities.
12929 SDValue NaN =
12930 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12931
12932 // Factor in the contribution of the low 16 bits.
12933 SDValue One = DAG.getConstant(1, dl, I32);
12934 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12935 DAG.getShiftAmountConstant(16, I32, dl));
12936 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12937 SDValue RoundingBias =
12938 DAG.getNode(ISD::ADD, dl, I32, Lsb, DAG.getConstant(0x7fff, dl, I32));
12939 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12940
12941 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12942 // 0x80000000.
12943 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12944
12945 // Now that we have rounded, shift the bits into position.
12946 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12947 DAG.getShiftAmountConstant(16, I32, dl));
12948 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12949 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12950 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12951 }
12952 return SDValue();
12953}
12954
12956 SelectionDAG &DAG) const {
12957 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12958 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12959 "Unexpected opcode!");
12960 assert((Node->getValueType(0).isScalableVector() ||
12961 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12962 "Fixed length vector types with constant offsets expected to use "
12963 "SHUFFLE_VECTOR!");
12964
12965 EVT VT = Node->getValueType(0);
12966 SDValue V1 = Node->getOperand(0);
12967 SDValue V2 = Node->getOperand(1);
12968 SDValue Offset = Node->getOperand(2);
12969 SDLoc DL(Node);
12970
12971 // Expand through memory thusly:
12972 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12973 // Store V1, Ptr
12974 // Store V2, Ptr + sizeof(V1)
12975 // if (VECTOR_SPLICE_LEFT)
12976 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12977 // else
12978 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12979 // Res = Load Ptr
12980
12981 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12982
12984 VT.getVectorElementCount() * 2);
12985 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12986 EVT PtrVT = StackPtr.getValueType();
12987 auto &MF = DAG.getMachineFunction();
12988 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12989 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12990
12991 // Store the lo part of CONCAT_VECTORS(V1, V2)
12992 SDValue StoreV1 =
12993 DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo, Alignment);
12994 // Store the hi part of CONCAT_VECTORS(V1, V2)
12995 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12996 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12997 SDValue StoreV2 =
12998 DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo, Alignment);
12999
13000 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
13001 SDValue EltByteSize =
13002 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
13003 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
13004 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
13005
13006 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
13007
13008 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
13009 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
13010 else
13011 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
13012
13013 // Load the spliced result
13014 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
13016}
13017
13019 SelectionDAG &DAG) const {
13020 SDLoc DL(Node);
13021 SDValue Vec = Node->getOperand(0);
13022 SDValue Mask = Node->getOperand(1);
13023 SDValue Passthru = Node->getOperand(2);
13024
13025 EVT VecVT = Vec.getValueType();
13026 EVT ScalarVT = VecVT.getScalarType();
13027 EVT MaskVT = Mask.getValueType();
13028 EVT MaskScalarVT = MaskVT.getScalarType();
13029
13030 // Needs to be handled by targets that have scalable vector types.
13031 if (VecVT.isScalableVector())
13032 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
13033
13034 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
13035 SDValue StackPtr = DAG.CreateStackTemporary(VecVT.getStoreSize(), Alignment);
13036 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13037 MachinePointerInfo PtrInfo =
13039
13040 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
13041 SDValue Chain = DAG.getEntryNode();
13042 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
13043
13044 bool HasPassthru = !Passthru.isUndef();
13045
13046 // If we have a passthru vector, store it on the stack, overwrite the matching
13047 // positions and then re-write the last element that was potentially
13048 // overwritten even though mask[i] = false.
13049 if (HasPassthru)
13050 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo, Alignment);
13051
13052 SDValue LastWriteVal;
13053 APInt PassthruSplatVal;
13054 bool IsSplatPassthru =
13055 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
13056
13057 if (IsSplatPassthru) {
13058 // As we do not know which position we wrote to last, we cannot simply
13059 // access that index from the passthru vector. So we first check if passthru
13060 // is a splat vector, to use any element ...
13061 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
13062 } else if (HasPassthru) {
13063 // ... if it is not a splat vector, we need to get the passthru value at
13064 // position = popcount(mask) and re-load it from the stack before it is
13065 // overwritten in the loop below.
13066 EVT PopcountVT = ScalarVT.changeTypeToInteger();
13067 SDValue Popcount = DAG.getNode(
13069 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
13070 Popcount = DAG.getNode(
13072 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
13073 Popcount);
13074 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
13075 SDValue LastElmtPtr =
13076 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
13077 LastWriteVal = DAG.getLoad(
13078 ScalarVT, DL, Chain, LastElmtPtr,
13080 Chain = LastWriteVal.getValue(1);
13081 }
13082
13083 unsigned NumElms = VecVT.getVectorNumElements();
13084 for (unsigned I = 0; I < NumElms; I++) {
13085 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
13086 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13087 Chain = DAG.getStore(
13088 Chain, DL, ValI, OutPtr,
13090
13091 // Get the mask value and add it to the current output position. This
13092 // either increments by 1 if MaskI is true or adds 0 otherwise.
13093 // Freeze in case we have poison/undef mask entries.
13094 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
13095 MaskI = DAG.getFreeze(MaskI);
13096 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
13097 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
13098 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
13099
13100 if (HasPassthru && I == NumElms - 1) {
13101 SDValue EndOfVector =
13102 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
13103 SDValue AllLanesSelected =
13104 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
13105 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
13106 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13107
13108 // Re-write the last ValI if all lanes were selected. Otherwise,
13109 // overwrite the last write it with the passthru value.
13110 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
13111 LastWriteVal, SDNodeFlags::Unpredictable);
13112 Chain = DAG.getStore(
13113 Chain, DL, LastWriteVal, OutPtr,
13115 }
13116 }
13117
13118 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo, Alignment);
13119}
13120
13122 SDLoc DL(Node);
13123 EVT VT = Node->getValueType(0);
13124
13125 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
13126 auto [Mask, StepVec] =
13127 getLegalMaskAndStepVector(Node->getOperand(0), ZeroIsPoison, DL, DAG);
13128
13129 // No legal step vector: split mask in half and recombine results.
13130 // LoNumElts uses the non-poison CTTZ_ELTS so its result is well-defined
13131 // (== LoNumElts when no active lane), allowing the SETNE comparison.
13132 // Result: (ResLo != LoNumElts) ? ResLo : (LoNumElts + ResHi)
13133 if (!StepVec) {
13134 EVT ResVT = Node->getValueType(0);
13135 auto [MaskLo, MaskHi] = DAG.SplitVector(Node->getOperand(0), DL);
13136 SDValue LoNumElts = DAG.getElementCount(
13137 DL, ResVT, MaskLo.getValueType().getVectorElementCount());
13138 SDValue ResLo = DAG.getNode(ISD::CTTZ_ELTS, DL, ResVT, MaskLo);
13139 SDValue ResHi = DAG.getNode(Node->getOpcode(), DL, ResVT, MaskHi);
13140 SDValue ResLoNotNumElts = DAG.getSetCC(
13141 DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResVT),
13142 ResLo, LoNumElts, ISD::SETNE);
13143 // Per LangRef, ResVT must be wide enough to hold the total element count,
13144 // so the sum cannot wrap as an unsigned add. NSW is not guaranteed since
13145 // the count is only required to fit unsigned.
13146 SDValue Sum = DAG.getNode(ISD::ADD, DL, ResVT, LoNumElts, ResHi,
13148 return DAG.getSelect(DL, ResVT, ResLoNotNumElts, ResLo, Sum);
13149 }
13150
13151 EVT StepVecVT = StepVec.getValueType();
13152 EVT StepVT = StepVecVT.getVectorElementType();
13153
13154 // Promote the scalar result type early to avoid redundant zexts.
13156 StepVT = getTypeToTransformTo(*DAG.getContext(), StepVT);
13157
13158 SDValue VL =
13159 DAG.getElementCount(DL, StepVT, StepVecVT.getVectorElementCount());
13160 SDValue SplatVL = DAG.getSplat(StepVecVT, DL, VL);
13161 StepVec = DAG.getNode(ISD::SUB, DL, StepVecVT, SplatVL, StepVec);
13162 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
13163 SDValue Select = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
13165 StepVecVT.getVectorElementType(), Select);
13166 SDValue Sub = DAG.getNode(ISD::SUB, DL, StepVT, VL,
13167 DAG.getZExtOrTrunc(Max, DL, StepVT));
13168
13169 return DAG.getZExtOrTrunc(Sub, DL, VT);
13170}
13171
13173 SelectionDAG &DAG) const {
13174 SDLoc DL(N);
13175 SDValue Acc = N->getOperand(0);
13176 SDValue MulLHS = N->getOperand(1);
13177 SDValue MulRHS = N->getOperand(2);
13178 EVT AccVT = Acc.getValueType();
13179 EVT MulOpVT = MulLHS.getValueType();
13180
13181 EVT ExtMulOpVT =
13183 MulOpVT.getVectorElementCount());
13184
13185 unsigned ExtOpcLHS, ExtOpcRHS;
13186 switch (N->getOpcode()) {
13187 default:
13188 llvm_unreachable("Unexpected opcode");
13190 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
13191 break;
13193 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
13194 break;
13196 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
13197 break;
13198 }
13199
13200 if (ExtMulOpVT != MulOpVT) {
13201 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
13202 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
13203 }
13204 SDValue Input = MulLHS;
13205 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
13206 if (!llvm::isOneOrOneSplatFP(MulRHS))
13207 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13208 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
13209 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13210 }
13211
13212 unsigned Stride = AccVT.getVectorMinNumElements();
13213 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
13214
13215 // Collect all of the subvectors
13216 std::deque<SDValue> Subvectors = {Acc};
13217 for (unsigned I = 0; I < ScaleFactor; I++)
13218 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
13219
13220 unsigned FlatNode =
13221 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
13222
13223 // Flatten the subvector tree
13224 while (Subvectors.size() > 1) {
13225 Subvectors.push_back(
13226 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
13227 Subvectors.pop_front();
13228 Subvectors.pop_front();
13229 }
13230
13231 assert(Subvectors.size() == 1 &&
13232 "There should only be one subvector after tree flattening");
13233
13234 return Subvectors[0];
13235}
13236
13237/// Given a store node \p StoreNode, return true if it is safe to fold that node
13238/// into \p FPNode, which expands to a library call with output pointers.
13240 SDNode *FPNode) {
13242 SmallVector<const SDNode *, 8> DeferredNodes;
13244
13245 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
13246 for (SDValue Op : StoreNode->ops())
13247 if (Op.getNode() != FPNode)
13248 Worklist.push_back(Op.getNode());
13249
13251 while (!Worklist.empty()) {
13252 const SDNode *Node = Worklist.pop_back_val();
13253 auto [_, Inserted] = Visited.insert(Node);
13254 if (!Inserted)
13255 continue;
13256
13257 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
13258 return false;
13259
13260 // Reached the FPNode (would result in a cycle).
13261 // OR Reached CALLSEQ_START (would result in nested call sequences).
13262 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
13263 return false;
13264
13265 if (Node->getOpcode() == ISD::CALLSEQ_END) {
13266 // Defer looking into call sequences (so we can check we're outside one).
13267 // We still need to look through these for the predecessor check.
13268 DeferredNodes.push_back(Node);
13269 continue;
13270 }
13271
13272 for (SDValue Op : Node->ops())
13273 Worklist.push_back(Op.getNode());
13274 }
13275
13276 // True if we're outside a call sequence and don't have the FPNode as a
13277 // predecessor. No cycles or nested call sequences possible.
13278 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
13279 MaxSteps);
13280}
13281
13283 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
13285 std::optional<unsigned> CallRetResNo) const {
13286 if (LC == RTLIB::UNKNOWN_LIBCALL)
13287 return false;
13288
13289 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
13290 if (LibcallImpl == RTLIB::Unsupported)
13291 return false;
13292
13293 LLVMContext &Ctx = *DAG.getContext();
13294 EVT VT = Node->getValueType(0);
13295 unsigned NumResults = Node->getNumValues();
13296
13297 // Find users of the node that store the results (and share input chains). The
13298 // destination pointers can be used instead of creating stack allocations.
13299 SDValue StoresInChain;
13300 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
13301 for (SDNode *User : Node->users()) {
13303 continue;
13304 auto *ST = cast<StoreSDNode>(User);
13305 SDValue StoreValue = ST->getValue();
13306 unsigned ResNo = StoreValue.getResNo();
13307 // Ensure the store corresponds to an output pointer.
13308 if (CallRetResNo == ResNo)
13309 continue;
13310 // Ensure the store to the default address space and not atomic or volatile.
13311 if (!ST->isSimple() || ST->getAddressSpace() != 0)
13312 continue;
13313 // Ensure all store chains are the same (so they don't alias).
13314 if (StoresInChain && ST->getChain() != StoresInChain)
13315 continue;
13316 // Ensure the store is properly aligned.
13317 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
13318 if (ST->getAlign() <
13319 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
13320 continue;
13321 // Avoid:
13322 // 1. Creating cyclic dependencies.
13323 // 2. Expanding the node to a call within a call sequence.
13325 continue;
13326 ResultStores[ResNo] = ST;
13327 StoresInChain = ST->getChain();
13328 }
13329
13330 ArgListTy Args;
13331
13332 // Pass the arguments.
13333 for (const SDValue &Op : Node->op_values()) {
13334 EVT ArgVT = Op.getValueType();
13335 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
13336 Args.emplace_back(Op, ArgTy);
13337 }
13338
13339 // Pass the output pointers.
13340 SmallVector<SDValue, 2> ResultPtrs(NumResults);
13342 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
13343 if (ResNo == CallRetResNo)
13344 continue;
13345 EVT ResVT = Node->getValueType(ResNo);
13346 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
13347 ResultPtrs[ResNo] = ResultPtr;
13348 Args.emplace_back(ResultPtr, PointerTy);
13349 }
13350
13351 SDLoc DL(Node);
13352
13354 // Pass the vector mask (if required).
13355 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
13356 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
13357 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
13358 }
13359
13360 Type *RetType = CallRetResNo.has_value()
13361 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
13362 : Type::getVoidTy(Ctx);
13363 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
13364 SDValue Callee =
13365 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
13367 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
13368 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
13369
13370 auto [Call, CallChain] = LowerCallTo(CLI);
13371
13372 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
13373 if (ResNo == CallRetResNo) {
13374 Results.push_back(Call);
13375 continue;
13376 }
13377 MachinePointerInfo PtrInfo;
13378 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
13379 ResultPtr, PtrInfo);
13380 SDValue OutChain = LoadResult.getValue(1);
13381
13382 if (StoreSDNode *ST = ResultStores[ResNo]) {
13383 // Replace store with the library call.
13384 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
13385 PtrInfo = ST->getPointerInfo();
13386 } else {
13388 DAG.getMachineFunction(),
13389 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
13390 }
13391
13392 Results.push_back(LoadResult);
13393 }
13394
13395 return true;
13396}
13397
13399 SDValue &LHS, SDValue &RHS,
13400 SDValue &CC, SDValue Mask,
13401 SDValue EVL, bool &NeedInvert,
13402 const SDLoc &dl, SDValue &Chain,
13403 bool IsSignaling) const {
13404 MVT OpVT = LHS.getSimpleValueType();
13405 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
13406 NeedInvert = false;
13407 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
13408 bool IsNonVP = !EVL;
13409 switch (getCondCodeAction(CCCode, OpVT)) {
13410 default:
13411 llvm_unreachable("Unknown condition code action!");
13413 // Nothing to do.
13414 break;
13417 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13418 std::swap(LHS, RHS);
13419 CC = DAG.getCondCode(InvCC);
13420 return true;
13421 }
13422 // Swapping operands didn't work. Try inverting the condition.
13423 bool NeedSwap = false;
13424 InvCC = getSetCCInverse(CCCode, OpVT);
13425 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
13426 // If inverting the condition is not enough, try swapping operands
13427 // on top of it.
13428 InvCC = ISD::getSetCCSwappedOperands(InvCC);
13429 NeedSwap = true;
13430 }
13431 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13432 CC = DAG.getCondCode(InvCC);
13433 NeedInvert = true;
13434 if (NeedSwap)
13435 std::swap(LHS, RHS);
13436 return true;
13437 }
13438
13439 // Special case: expand i1 comparisons using logical operations.
13440 if (OpVT == MVT::i1) {
13441 SDValue Ret;
13442 switch (CCCode) {
13443 default:
13444 llvm_unreachable("Unknown integer setcc!");
13445 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
13446 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
13447 MVT::i1);
13448 break;
13449 case ISD::SETNE: // X != Y --> (X ^ Y)
13450 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
13451 break;
13452 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13453 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13454 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
13455 DAG.getNOT(dl, LHS, MVT::i1));
13456 break;
13457 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13458 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13459 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
13460 DAG.getNOT(dl, RHS, MVT::i1));
13461 break;
13462 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13463 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13464 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
13465 DAG.getNOT(dl, LHS, MVT::i1));
13466 break;
13467 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13468 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13469 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
13470 DAG.getNOT(dl, RHS, MVT::i1));
13471 break;
13472 }
13473
13474 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
13475 RHS = SDValue();
13476 CC = SDValue();
13477 return true;
13478 }
13479
13481 unsigned Opc = 0;
13482 switch (CCCode) {
13483 default:
13484 llvm_unreachable("Don't know how to expand this condition!");
13485 case ISD::SETUO:
13486 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
13487 CC1 = ISD::SETUNE;
13488 CC2 = ISD::SETUNE;
13489 Opc = ISD::OR;
13490 break;
13491 }
13493 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
13494 NeedInvert = true;
13495 [[fallthrough]];
13496 case ISD::SETO:
13498 "If SETO is expanded, SETOEQ must be legal!");
13499 CC1 = ISD::SETOEQ;
13500 CC2 = ISD::SETOEQ;
13501 Opc = ISD::AND;
13502 break;
13503 case ISD::SETONE:
13504 case ISD::SETUEQ:
13505 // If the SETUO or SETO CC isn't legal, we might be able to use
13506 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
13507 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
13508 // the operands.
13509 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13510 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
13511 isCondCodeLegal(ISD::SETOLT, OpVT))) {
13512 CC1 = ISD::SETOGT;
13513 CC2 = ISD::SETOLT;
13514 Opc = ISD::OR;
13515 NeedInvert = ((unsigned)CCCode & 0x8U);
13516 break;
13517 }
13518 [[fallthrough]];
13519 case ISD::SETOEQ:
13520 case ISD::SETOGT:
13521 case ISD::SETOGE:
13522 case ISD::SETOLT:
13523 case ISD::SETOLE:
13524 case ISD::SETUNE:
13525 case ISD::SETUGT:
13526 case ISD::SETUGE:
13527 case ISD::SETULT:
13528 case ISD::SETULE:
13529 // If we are floating point, assign and break, otherwise fall through.
13530 if (!OpVT.isInteger()) {
13531 // We can use the 4th bit to tell if we are the unordered
13532 // or ordered version of the opcode.
13533 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13534 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
13535 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
13536 break;
13537 }
13538 // Fallthrough if we are unsigned integer.
13539 [[fallthrough]];
13540 case ISD::SETLE:
13541 case ISD::SETGT:
13542 case ISD::SETGE:
13543 case ISD::SETLT:
13544 case ISD::SETNE:
13545 case ISD::SETEQ:
13546 // If all combinations of inverting the condition and swapping operands
13547 // didn't work then we have no means to expand the condition.
13548 llvm_unreachable("Don't know how to expand this condition!");
13549 }
13550
13551 SDValue SetCC1, SetCC2;
13552 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
13553 // If we aren't the ordered or unorder operation,
13554 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
13555 if (IsNonVP) {
13556 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
13557 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
13558 } else {
13559 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
13560 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
13561 }
13562 } else {
13563 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
13564 if (IsNonVP) {
13565 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
13566 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
13567 } else {
13568 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
13569 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
13570 }
13571 }
13572 if (Chain)
13573 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
13574 SetCC2.getValue(1));
13575 if (IsNonVP)
13576 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
13577 else {
13578 // Transform the binary opcode to the VP equivalent.
13579 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
13580 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
13581 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
13582 }
13583 RHS = SDValue();
13584 CC = SDValue();
13585 return true;
13586 }
13587 }
13588 return false;
13589}
13590
13592 SelectionDAG &DAG) const {
13593 EVT VT = Node->getValueType(0);
13594 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
13595 // split into two equal parts.
13596 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
13597 return SDValue();
13598
13599 // Restrict expansion to cases where both parts can be concatenated.
13600 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
13601 if (LoVT != HiVT || !isTypeLegal(LoVT))
13602 return SDValue();
13603
13604 SDLoc DL(Node);
13605 unsigned Opcode = Node->getOpcode();
13606
13607 // Don't expand if the result is likely to be unrolled anyway.
13608 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
13609 return SDValue();
13610
13611 SmallVector<SDValue, 4> LoOps, HiOps;
13612 for (const SDValue &V : Node->op_values()) {
13613 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
13614 LoOps.push_back(Lo);
13615 HiOps.push_back(Hi);
13616 }
13617
13618 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps, Node->getFlags());
13619 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps, Node->getFlags());
13620 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
13621}
13622
13624 const SDLoc &DL,
13625 EVT InVecVT, SDValue EltNo,
13626 LoadSDNode *OriginalLoad,
13627 SelectionDAG &DAG) const {
13628 assert(OriginalLoad->isSimple());
13629
13630 EVT VecEltVT = InVecVT.getVectorElementType();
13631
13632 // If the vector element type is not a multiple of a byte then we are unable
13633 // to correctly compute an address to load only the extracted element as a
13634 // scalar.
13635 if (!VecEltVT.isByteSized())
13636 return SDValue();
13637
13638 ISD::LoadExtType ExtTy =
13639 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13640 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13641 return SDValue();
13642
13643 std::optional<unsigned> ByteOffset;
13644 Align Alignment = OriginalLoad->getAlign();
13646 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13647 int Elt = ConstEltNo->getZExtValue();
13648 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13649 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
13650 Alignment = commonAlignment(Alignment, *ByteOffset);
13651 } else {
13652 // Discard the pointer info except the address space because the memory
13653 // operand can't represent this new access since the offset is variable.
13654 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13655 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
13656 }
13657
13658 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
13659 return SDValue();
13660
13661 unsigned IsFast = 0;
13662 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
13663 OriginalLoad->getAddressSpace(), Alignment,
13664 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
13665 !IsFast)
13666 return SDValue();
13667
13668 // The original DAG loaded the entire vector from memory, so arithmetic
13669 // within it must be inbounds.
13671 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
13672
13673 // We are replacing a vector load with a scalar load. The new load must have
13674 // identical memory op ordering to the original.
13675 SDValue Load;
13676 if (ResultVT.bitsGT(VecEltVT)) {
13677 // If the result type of vextract is wider than the load, then issue an
13678 // extending load instead.
13679 ISD::LoadExtType ExtType =
13680 isLoadLegal(ResultVT, VecEltVT, Alignment,
13681 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
13683 : ISD::EXTLOAD;
13684 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
13685 NewPtr, MPI, VecEltVT, Alignment,
13686 OriginalLoad->getMemOperand()->getFlags(),
13687 OriginalLoad->getAAInfo());
13688 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13689 } else {
13690 // The result type is narrower or the same width as the vector element
13691 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
13692 Alignment, OriginalLoad->getMemOperand()->getFlags(),
13693 OriginalLoad->getAAInfo());
13694 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13695 if (ResultVT.bitsLT(VecEltVT))
13696 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
13697 else
13698 Load = DAG.getBitcast(ResultVT, Load);
13699 }
13700
13701 return Load;
13702}
13703
13704// Set type id for call site info and metadata 'call_target'.
13705// We are filtering for:
13706// a) The call-graph-section use case that wants to know about indirect
13707// calls, or
13708// b) We want to annotate indirect calls.
13710 const CallBase *CB, MachineFunction &MF,
13711 MachineFunction::CallSiteInfo &CSInfo) const {
13712 if (CB && CB->isIndirectCall() &&
13715 CSInfo = MachineFunction::CallSiteInfo(*CB);
13716}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned Opcode)
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static std::pair< SDValue, SDValue > getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL, SelectionDAG &DAG)
Returns a type-legalized version of Mask as the first item in the pair.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:111
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:235
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:291
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:227
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:268
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1217
APInt bitcastToAPInt() const
Definition APFloat.h:1430
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
void changeSign()
Definition APFloat.h:1356
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1168
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1363
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1491
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1458
unsigned logBase2() const
Definition APInt.h:1784
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1342
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1440
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1465
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1366
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:872
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:447
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isIdentityElement(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo, unsigned Depth=0) const
Returns true if V is an identity element of Opc with Flags.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
iterator end() const
Definition StringRef.h:116
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getBitWidthForCttzElements(EVT RetVT, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_POISON nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_POISON nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_POISON nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_POISON nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
SDValue expandLoopDependenceMask(SDNode *N, SelectionDAG &DAG) const
Expand LOOP_DEPENDENCE_MASK nodes.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using an n/2-bit algorithm.
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_POISON nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandCONVERT_FROM_ARBITRARY_FP(SDNode *Node, SelectionDAG &DAG) const
Expand CONVERT_FROM_ARBITRARY_FP using bit manipulation.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual void computeKnownFPClassForTargetNode(const SDValue Op, KnownFPClass &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine floating-point class information for a target node.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:785
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:311
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:716
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3061
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:949
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ ABS_MIN_POISON
ABS with a poison result for INT_MIN.
Definition ISDOpcodes.h:751
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1569
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ AfterLegalizeTypes
Definition DAGCombine.h:17
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
fltNonfiniteBehavior
Definition APFloat.h:952
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1666
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:266
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:453
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:501
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:435
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:210
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT widenIntegerElementType(LLVMContext &Context) const
Return a VT for an integer element type with doubled bit width.
Definition ValueTypes.h:467
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:315
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:269
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:97
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165
KnownBits byteSwap() const
Definition KnownBits.h:553
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
KnownBits reverseBits() const
Definition KnownBits.h:557
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:335
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:67
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:171
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1013
fltNanEncoding nanEncoding
Definition APFloat.h:1015