LLVM 23.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(3));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
90// TODO: Support more ops
91static const unsigned ZvfbfaVPOps[] = {
92 ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
99
101 const RISCVSubtarget &STI)
102 : TargetLowering(TM, STI), Subtarget(STI) {
103
104 RISCVABI::ABI ABI = Subtarget.getTargetABI();
105 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
106
107 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
108 !Subtarget.hasStdExtF()) {
109 errs() << "Hard-float 'f' ABI can't be used for a target that "
110 "doesn't support the F instruction set extension (ignoring "
111 "target-abi)\n";
112 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
113 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
114 !Subtarget.hasStdExtD()) {
115 errs() << "Hard-float 'd' ABI can't be used for a target that "
116 "doesn't support the D instruction set extension (ignoring "
117 "target-abi)\n";
118 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
119 }
120
121 switch (ABI) {
122 default:
123 reportFatalUsageError("Don't know how to lower this ABI");
132 break;
133 }
134
135 MVT XLenVT = Subtarget.getXLenVT();
136
137 // Set up the register classes.
138 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
139
140 if (Subtarget.hasStdExtZfhmin())
141 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
142 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
143 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
144 if (Subtarget.hasStdExtF())
145 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
146 if (Subtarget.hasStdExtD())
147 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
148 if (Subtarget.hasStdExtZhinxmin())
149 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
150 if (Subtarget.hasStdExtZfinx())
151 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
152 if (Subtarget.hasStdExtZdinx()) {
153 if (Subtarget.is64Bit())
154 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
155 else
156 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
157 }
158
159 static const MVT::SimpleValueType BoolVecVTs[] = {
160 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
161 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
162 static const MVT::SimpleValueType IntVecVTs[] = {
163 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
164 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
165 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
166 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
167 MVT::nxv4i64, MVT::nxv8i64};
168 static const MVT::SimpleValueType F16VecVTs[] = {
169 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
170 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
171 static const MVT::SimpleValueType BF16VecVTs[] = {
172 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
173 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
174 static const MVT::SimpleValueType F32VecVTs[] = {
175 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
176 static const MVT::SimpleValueType F64VecVTs[] = {
177 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
178 static const MVT::SimpleValueType VecTupleVTs[] = {
179 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
180 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
181 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
182 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
183 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
184 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
185 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
186 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
187 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
188 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
189 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
190
191 if (Subtarget.hasVInstructions()) {
192 auto addRegClassForRVV = [this](MVT VT) {
193 // Disable the smallest fractional LMUL types if ELEN is less than
194 // RVVBitsPerBlock.
195 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
196 if (VT.getVectorMinNumElements() < MinElts)
197 return;
198
199 unsigned Size = VT.getSizeInBits().getKnownMinValue();
200 const TargetRegisterClass *RC;
202 RC = &RISCV::VRRegClass;
203 else if (Size == 2 * RISCV::RVVBitsPerBlock)
204 RC = &RISCV::VRM2RegClass;
205 else if (Size == 4 * RISCV::RVVBitsPerBlock)
206 RC = &RISCV::VRM4RegClass;
207 else if (Size == 8 * RISCV::RVVBitsPerBlock)
208 RC = &RISCV::VRM8RegClass;
209 else
210 llvm_unreachable("Unexpected size");
211
212 addRegisterClass(VT, RC);
213 };
214
215 for (MVT VT : BoolVecVTs)
216 addRegClassForRVV(VT);
217 for (MVT VT : IntVecVTs) {
218 if (VT.getVectorElementType() == MVT::i64 &&
219 !Subtarget.hasVInstructionsI64())
220 continue;
221 addRegClassForRVV(VT);
222 }
223
224 if (Subtarget.hasVInstructionsF16Minimal() ||
225 Subtarget.hasVendorXAndesVPackFPH())
226 for (MVT VT : F16VecVTs)
227 addRegClassForRVV(VT);
228
229 if (Subtarget.hasVInstructionsBF16Minimal() ||
230 Subtarget.hasVendorXAndesVBFHCvt())
231 for (MVT VT : BF16VecVTs)
232 addRegClassForRVV(VT);
233
234 if (Subtarget.hasVInstructionsF32())
235 for (MVT VT : F32VecVTs)
236 addRegClassForRVV(VT);
237
238 if (Subtarget.hasVInstructionsF64())
239 for (MVT VT : F64VecVTs)
240 addRegClassForRVV(VT);
241
242 if (Subtarget.useRVVForFixedLengthVectors()) {
243 auto addRegClassForFixedVectors = [this](MVT VT) {
244 MVT ContainerVT = getContainerForFixedLengthVector(VT);
245 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
246 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
247 addRegisterClass(VT, TRI.getRegClass(RCID));
248 };
250 if (useRVVForFixedLengthVectorVT(VT))
251 addRegClassForFixedVectors(VT);
252
254 if (useRVVForFixedLengthVectorVT(VT))
255 addRegClassForFixedVectors(VT);
256 }
257
258 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
266 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
267 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
268 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
269 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
270 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
271 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
272 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
273 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
274 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
275 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
276 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
277 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
278 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
279 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
280 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
281 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
282 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
283 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
284 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
285 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
286 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
287 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
288 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
289 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
290 }
291
292 // fixed vector is stored in GPRs for P extension packed operations
293 if (Subtarget.hasStdExtP()) {
294 if (Subtarget.is64Bit()) {
295 addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
296 addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
297 addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);
298 } else {
299 addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
300 addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
301 }
302 }
303
304 // Compute derived properties from the register classes.
306
308
310 MVT::i1, Promote);
311 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
313 MVT::i1, Promote);
314
315 // TODO: add all necessary setOperationAction calls.
317
322
327 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
330 }
331
333
336
337 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
338 !Subtarget.hasVendorXAndesPerf())
340
342
343 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
344 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
345 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
346 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
347
348 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
351 }
352
353 if (Subtarget.is64Bit()) {
355
358 MVT::i32, Custom);
361 } else if (Subtarget.hasStdExtP()) {
362 // Custom legalize i64 ADD/SUB/SHL/SRL/SRA for RV32+P.
365 }
366 if (!Subtarget.hasStdExtZmmul()) {
368 } else if (Subtarget.is64Bit()) {
371 } else {
373 }
374
375 if (!Subtarget.hasStdExtM()) {
377 Expand);
378 } else if (Subtarget.is64Bit()) {
380 {MVT::i8, MVT::i16, MVT::i32}, Custom);
381 }
382
384
385 // On RV32, the P extension has a WMUL(U) instruction we can use for
386 // (S/U)MUL_LOHI.
387 // FIXME: Does P imply Zmmul?
388 if (!Subtarget.hasStdExtP() || !Subtarget.hasStdExtZmmul() ||
389 Subtarget.is64Bit())
391
393 Custom);
394
395 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
396 if (Subtarget.is64Bit())
398 } else if (Subtarget.hasVendorXTHeadBb()) {
399 if (Subtarget.is64Bit())
402 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
404 } else {
406 }
407
408 if (Subtarget.hasStdExtP())
410
412 Subtarget.hasREV8Like() ? Legal : Expand);
413
414 if (Subtarget.hasREVLike()) {
416 } else {
417 // Zbkb can use rev8+brev8 to implement bitreverse.
419 Subtarget.hasStdExtZbkb() ? Custom : Expand);
420 if (Subtarget.hasStdExtZbkb())
422 }
423
424 if (Subtarget.hasStdExtZbb() ||
425 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
427 Legal);
428 }
429
430 if (Subtarget.hasCTZLike()) {
431 if (Subtarget.is64Bit())
433 } else {
435 }
436
437 if (!Subtarget.hasCPOPLike()) {
438 // TODO: These should be set to LibCall, but this currently breaks
439 // the Linux kernel build. See #101786. Lacks i128 tests, too.
440 if (Subtarget.is64Bit())
442 else
445 }
446
447 if (Subtarget.hasCLZLike()) {
448 // We need the custom lowering to make sure that the resulting sequence
449 // for the 32bit case is efficient on 64bit targets.
450 // Use default promotion for i32 without Zbb.
451 if (Subtarget.is64Bit() &&
452 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))
454 } else {
456 }
457
458 if (Subtarget.hasStdExtP()) {
460 if (Subtarget.is64Bit())
462 }
463
464 if (Subtarget.hasStdExtP() ||
465 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
467 if (Subtarget.is64Bit())
469 } else if (Subtarget.hasShortForwardBranchIALU()) {
470 // We can use PseudoCCSUB to implement ABS.
472 } else if (Subtarget.is64Bit()) {
474 }
475
476 if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
478
479 if ((Subtarget.hasStdExtP() || Subtarget.hasVendorXqcia()) &&
480 !Subtarget.is64Bit()) {
482 MVT::i32, Legal);
483 } else if (Subtarget.hasStdExtP() && Subtarget.is64Bit()) {
485 MVT::i32, Custom);
486 } else if (!Subtarget.hasStdExtZbb() && Subtarget.is64Bit()) {
488 MVT::i32, Custom);
489 }
490
491 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
493 }
494
495 if ((Subtarget.hasStdExtP() || Subtarget.hasVendorXqcia()) &&
496 !Subtarget.is64Bit()) {
497 // FIXME: Support i32 on RV64+P by inserting into a v2i32 vector, doing
498 // pssha.w and extracting.
500 }
501
502 if (Subtarget.hasStdExtZbc() || Subtarget.hasStdExtZbkc())
504 if (Subtarget.hasStdExtZbc())
506
507 static const unsigned FPLegalNodeTypes[] = {
515
516 static const ISD::CondCode FPCCToExpand[] = {
520
521 static const unsigned FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
522 ISD::FPOW};
523 static const unsigned FPOpToLibCall[] = {ISD::FREM};
524
525 static const unsigned FPRndMode[] = {
528
529 static const unsigned ZfhminZfbfminPromoteOps[] = {
539
540 if (Subtarget.hasStdExtP()) {
542 static const MVT RV32VTs[] = {MVT::v2i16, MVT::v4i8};
543 static const MVT RV64VTs[] = {MVT::v2i32, MVT::v4i16, MVT::v8i8};
544 ArrayRef<MVT> VTs;
545 if (Subtarget.is64Bit()) {
546 VTs = RV64VTs;
547 // There's no instruction for vector shamt in P extension so we unroll to
548 // scalar instructions. Vector VTs that are 32-bit are widened to 64-bit
549 // vector, e.g. v2i16 -> v4i16, before getting unrolled, so we need custom
550 // widen for those operations that will be unrolled.
552 {MVT::v2i16, MVT::v4i8}, Custom);
553 } else {
554 VTs = RV32VTs;
555 }
556 // By default everything must be expanded.
557 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
559
560 for (MVT VT : VTs) {
562 setTruncStoreAction(VT, OtherVT, Expand);
564 OtherVT, Expand);
565 }
566 }
567
577 for (MVT VT : VTs) {
578 if (VT != MVT::v2i32)
580 if (VT.getVectorElementType() != MVT::i8)
582 }
589 Custom);
591 Legal);
597 VTs, Expand);
598
599 if (!Subtarget.is64Bit())
601
602 // P extension vector comparisons produce all 1s for true, all 0s for false
604 }
605
606 if (Subtarget.hasStdExtZfbfmin()) {
612 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
619 }
620
621 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
622 if (Subtarget.hasStdExtZfhOrZhinx()) {
623 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
624 setOperationAction(FPRndMode, MVT::f16,
625 Subtarget.hasStdExtZfa() ? Legal : Custom);
628 Subtarget.hasStdExtZfa() ? Legal : Custom);
629 if (Subtarget.hasStdExtZfa())
631 } else {
632 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
637 setOperationAction(Op, MVT::f16, Custom);
643 }
644
645 if (!Subtarget.hasStdExtD()) {
646 // FIXME: handle f16 fma when f64 is not legal. Using an f32 fma
647 // instruction runs into double rounding issues, so this is wrong.
648 // Normally we'd use an f64 fma, but without the D extension the f64 type
649 // is not legal. This should probably be a libcall.
650 AddPromotedToType(ISD::FMA, MVT::f16, MVT::f32);
651 AddPromotedToType(ISD::STRICT_FMA, MVT::f16, MVT::f32);
652 }
653
655
658 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
662
664 ISD::FNEARBYINT, MVT::f16,
665 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
670 MVT::f16, Promote);
671
672 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
673 // complete support for all operations in LegalizeDAG.
678 MVT::f16, Promote);
679
680 // We need to custom promote this.
681 if (Subtarget.is64Bit())
683 }
684
685 if (Subtarget.hasStdExtFOrZfinx()) {
686 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
687 setOperationAction(FPRndMode, MVT::f32,
688 Subtarget.hasStdExtZfa() ? Legal : Custom);
689 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
693 setOperationAction(FPOpToExpand, MVT::f32, Expand);
694 setOperationAction(FPOpToLibCall, MVT::f32, LibCall);
695 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
696 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
697 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
698 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
702 Subtarget.isSoftFPABI() ? LibCall : Custom);
707
708 if (Subtarget.hasStdExtZfa()) {
712 } else {
714 }
715 }
716
717 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
719
720 if (Subtarget.hasStdExtDOrZdinx()) {
721 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
722
723 if (!Subtarget.is64Bit())
725
726 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
727 !Subtarget.is64Bit()) {
730 }
731
732 if (Subtarget.hasStdExtZfa()) {
734 setOperationAction(FPRndMode, MVT::f64, Legal);
737 } else {
738 if (Subtarget.is64Bit())
739 setOperationAction(FPRndMode, MVT::f64, Custom);
740
742 }
743
746 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
750 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
751 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
752 setOperationAction(FPOpToExpand, MVT::f64, Expand);
753 setOperationAction(FPOpToLibCall, MVT::f64, LibCall);
754 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
755 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
756 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
757 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
761 Subtarget.isSoftFPABI() ? LibCall : Custom);
766 }
767
768 if (Subtarget.is64Bit()) {
771 MVT::i32, Custom);
773 }
774
775 if (Subtarget.hasStdExtFOrZfinx()) {
777 Custom);
778
779 // f16/bf16 require custom handling.
781 Custom);
783 Custom);
784
793 }
794
797 XLenVT, Custom);
798
800
801 if (Subtarget.is64Bit())
803
804 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
805 // Unfortunately this can't be determined just from the ISA naming string.
807 Subtarget.is64Bit() ? Legal : Custom);
809 Subtarget.is64Bit() ? Legal : Custom);
810
811 if (Subtarget.is64Bit()) {
814 }
815
818 if (Subtarget.is64Bit())
820
821 if (Subtarget.hasVendorXMIPSCBOP())
823 else if (Subtarget.hasStdExtZicbop())
825
826 if (Subtarget.hasStdExtZalrsc()) {
827 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
828 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
830 else
832 } else if (Subtarget.hasForcedAtomics()) {
833 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
834 } else {
836 }
837
839
841
842 if (getTargetMachine().getTargetTriple().isOSLinux()) {
843 // Custom lowering of llvm.clear_cache.
845 }
846
847 if (Subtarget.hasVInstructions()) {
849
851
852 // RVV intrinsics may have illegal operands.
853 // We also need to custom legalize vmv.x.s.
856 {MVT::i8, MVT::i16}, Custom);
857 if (Subtarget.is64Bit())
859 MVT::i32, Custom);
860 else
862 MVT::i64, Custom);
863
865 MVT::Other, Custom);
866
867 static const unsigned IntegerVPOps[] = {
868 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
869 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
870 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
871 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
872 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
873 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
874 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
875 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
876 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
877 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
878 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
879 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
880 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
881 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
882
883 static const unsigned FloatingPointVPOps[] = {
884 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
885 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
886 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
887 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
888 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
889 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
890 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
891 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
892 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
893 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
894 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
895 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
896 ISD::VP_REDUCE_FMAXIMUM};
897
898 static const unsigned IntegerVecReduceOps[] = {
902
903 static const unsigned FloatingPointVecReduceOps[] = {
906
907 static const unsigned FloatingPointLibCallOps[] = {
910
911 if (!Subtarget.is64Bit()) {
912 // We must custom-lower certain vXi64 operations on RV32 due to the vector
913 // element type being illegal.
915 MVT::i64, Custom);
916
917 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
918
919 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
920 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
921 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
922 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
923 MVT::i64, Custom);
924 }
925
926 for (MVT VT : BoolVecVTs) {
927 if (!isTypeLegal(VT))
928 continue;
929
931
932 // Mask VTs are custom-expanded into a series of standard nodes
936 VT, Custom);
937
939 Custom);
940
942 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
943 Expand);
944 setOperationAction(ISD::VP_MERGE, VT, Custom);
945
946 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
947 Custom);
948
949 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
950
953 Custom);
954
956 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
957 Custom);
958
959 // RVV has native int->float & float->int conversions where the
960 // element type sizes are within one power-of-two of each other. Any
961 // wider distances between type sizes have to be lowered as sequences
962 // which progressively narrow the gap in stages.
967 VT, Custom);
969 Custom);
970
971 // Expand all extending loads to types larger than this, and truncating
972 // stores from types larger than this.
974 setTruncStoreAction(VT, OtherVT, Expand);
976 OtherVT, Expand);
977 }
978
979 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
980 ISD::VP_TRUNCATE, ISD::VP_SETCC},
981 VT, Custom);
982
985
987
988 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
989 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
990
993 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
994 }
995
996 for (MVT VT : IntVecVTs) {
997 if (!isTypeLegal(VT))
998 continue;
999
1002
1003 // Vectors implement MULHS/MULHU.
1005
1006 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1007 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
1009
1011 Legal);
1012
1013 if (Subtarget.hasStdExtZvabd()) {
1015 // Only SEW=8/16 are supported in Zvabd.
1016 if (VT.getVectorElementType() == MVT::i8 ||
1017 VT.getVectorElementType() == MVT::i16)
1019 else
1021 } else
1023
1024 // Custom-lower extensions and truncations from/to mask types.
1026 VT, Custom);
1027
1028 // RVV has native int->float & float->int conversions where the
1029 // element type sizes are within one power-of-two of each other. Any
1030 // wider distances between type sizes have to be lowered as sequences
1031 // which progressively narrow the gap in stages.
1036 VT, Custom);
1038 Custom);
1042 VT, Legal);
1043
1044 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
1045 // nodes which truncate by one power of two at a time.
1048 Custom);
1049
1050 // Custom-lower insert/extract operations to simplify patterns.
1052 Custom);
1053
1054 // Custom-lower reduction operations to set up the corresponding custom
1055 // nodes' operands.
1056 setOperationAction(IntegerVecReduceOps, VT, Custom);
1057
1058 setOperationAction(IntegerVPOps, VT, Custom);
1059
1061
1063 VT, Custom);
1064
1066 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1067 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1068 VT, Custom);
1069 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1070
1073 VT, Custom);
1074
1077
1079
1081 setTruncStoreAction(VT, OtherVT, Expand);
1083 OtherVT, Expand);
1084 }
1085
1088
1090 VT, Custom);
1091
1092 if (Subtarget.hasStdExtZvkb()) {
1094 setOperationAction(ISD::VP_BSWAP, VT, Custom);
1095 } else {
1096 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
1098 }
1099
1100 if (Subtarget.hasStdExtZvbb()) {
1102 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
1103 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
1104 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
1105 VT, Custom);
1106 } else {
1107 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
1109 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
1110 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
1111 VT, Expand);
1112
1113 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1114 // range of f32.
1115 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1116 if (isTypeLegal(FloatVT)) {
1118 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
1119 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
1120 VT, Custom);
1121 }
1122 }
1123
1124 if (VT.getVectorElementType() == MVT::i64) {
1125 if (Subtarget.hasStdExtZvbc())
1127 } else {
1128 if (Subtarget.hasStdExtZvbc32e()) {
1130 } else if (Subtarget.hasStdExtZvbc()) {
1131 // Promote to i64 if the lmul is small enough.
1132 // FIXME: Split if necessary to widen.
1133 // FIXME: Promote clmulh directly without legalizing to clmul first.
1134 MVT I64VecVT = MVT::getVectorVT(MVT::i64, VT.getVectorElementCount());
1135 if (isTypeLegal(I64VecVT))
1137 }
1138 }
1139
1141 }
1142
1143 for (MVT VT : VecTupleVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146
1148 }
1149
1150 // Expand various CCs to best match the RVV ISA, which natively supports UNE
1151 // but no other unordered comparisons, and supports all ordered comparisons
1152 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
1153 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
1154 // and we pattern-match those back to the "original", swapping operands once
1155 // more. This way we catch both operations and both "vf" and "fv" forms with
1156 // fewer patterns.
1157 static const ISD::CondCode VFPCCToExpand[] = {
1161 };
1162
1163 // TODO: support more ops.
1164 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1169 ISD::FADD,
1170 ISD::FSUB,
1171 ISD::FMUL,
1172 ISD::FMA,
1173 ISD::FDIV,
1174 ISD::FSQRT,
1175 ISD::FCEIL,
1180 ISD::FRINT,
1183 ISD::SETCC,
1196
1197 // TODO: Make more of these ops legal.
1198 static const unsigned ZvfbfaPromoteOps[] = {ISD::FDIV,
1199 ISD::FSQRT,
1200 ISD::FCEIL,
1205 ISD::FRINT,
1213
1214 // TODO: support more vp ops.
1215 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1216 ISD::VP_FADD,
1217 ISD::VP_FSUB,
1218 ISD::VP_FMUL,
1219 ISD::VP_FDIV,
1220 ISD::VP_FMA,
1221 ISD::VP_REDUCE_FMIN,
1222 ISD::VP_REDUCE_FMAX,
1223 ISD::VP_SQRT,
1224 ISD::VP_FMINNUM,
1225 ISD::VP_FMAXNUM,
1226 ISD::VP_FCEIL,
1227 ISD::VP_FFLOOR,
1228 ISD::VP_FROUND,
1229 ISD::VP_FROUNDEVEN,
1230 ISD::VP_FROUNDTOZERO,
1231 ISD::VP_FRINT,
1232 ISD::VP_FNEARBYINT,
1233 ISD::VP_SETCC,
1234 ISD::VP_FMINIMUM,
1235 ISD::VP_FMAXIMUM,
1236 ISD::VP_REDUCE_FMINIMUM,
1237 ISD::VP_REDUCE_FMAXIMUM};
1238
1239 // Sets common operation actions on RVV floating-point vector types.
1240 const auto SetCommonVFPActions = [&](MVT VT) {
1242 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1243 // sizes are within one power-of-two of each other. Therefore conversions
1244 // between vXf16 and vXf64 must be lowered as sequences which convert via
1245 // vXf32.
1249 // Custom-lower insert/extract operations to simplify patterns.
1251 Custom);
1252 // Expand various condition codes (explained above).
1253 setCondCodeAction(VFPCCToExpand, VT, Expand);
1254
1257 Legal);
1259
1263 VT, Custom);
1264
1265 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1266
1267 // Expand FP operations that need libcalls.
1268 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1269
1271
1273
1275 VT, Custom);
1276
1278 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1279 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1280 VT, Custom);
1281 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1282
1285
1288 VT, Custom);
1289
1292
1295 VT, Custom);
1296 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1297 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1298
1299 setOperationAction(FloatingPointVPOps, VT, Custom);
1300
1302 Custom);
1305 VT, Legal);
1310 VT, Custom);
1311
1313 };
1314
1315 // Sets common extload/truncstore actions on RVV floating-point vector
1316 // types.
1317 const auto SetCommonVFPExtLoadTruncStoreActions =
1318 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1319 for (auto SmallVT : SmallerVTs) {
1320 setTruncStoreAction(VT, SmallVT, Expand);
1321 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1322 }
1323 };
1324
1325 // Sets common actions for f16 and bf16 for when there's only
1326 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1327 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1330 Custom);
1331 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1334 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1335 Custom);
1337 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1343 VT, Custom);
1344 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1345 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1346 MVT EltVT = VT.getVectorElementType();
1347 if (isTypeLegal(EltVT))
1349 VT, Custom);
1350 else
1353 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1354 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1355 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1356 ISD::VP_SCATTER},
1357 VT, Custom);
1358 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1359
1363
1364 // Expand FP operations that need libcalls.
1365 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1366
1367 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1368 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1369 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1370 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1371 } else {
1372 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1373 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1374 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1375 }
1376 };
1377
1378 // Sets common actions for zvfbfa, some of instructions are supported
1379 // natively so that we don't need to promote them.
1380 const auto SetZvfbfaActions = [&](MVT VT) {
1383 Custom);
1384 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1387 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1388 Custom);
1390 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1397 VT, Custom);
1400 Legal);
1403 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1404 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1405
1410 VT, Legal);
1412 setCondCodeAction(VFPCCToExpand, VT, Expand);
1413
1415 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1416 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1417 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1418 ISD::VP_SCATTER},
1419 VT, Custom);
1420 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1421
1422 // Expand FP operations that need libcalls.
1423 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1424
1425 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1426 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1427 setOperationAction(ZvfbfaPromoteOps, VT, Custom);
1428 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1429 } else {
1430 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1431 setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
1432 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1433 }
1434 };
1435
1436 if (Subtarget.hasVInstructionsF16()) {
1437 for (MVT VT : F16VecVTs) {
1438 if (!isTypeLegal(VT))
1439 continue;
1440 SetCommonVFPActions(VT);
1441 }
1442 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1443 for (MVT VT : F16VecVTs) {
1444 if (!isTypeLegal(VT))
1445 continue;
1446 SetCommonPromoteToF32Actions(VT);
1447 }
1448 }
1449
1450 if (Subtarget.hasVInstructionsBF16()) {
1451 for (MVT VT : BF16VecVTs) {
1452 if (!isTypeLegal(VT))
1453 continue;
1454 SetZvfbfaActions(VT);
1455 }
1456 } else if (Subtarget.hasVInstructionsBF16Minimal()) {
1457 for (MVT VT : BF16VecVTs) {
1458 if (!isTypeLegal(VT))
1459 continue;
1460 SetCommonPromoteToF32Actions(VT);
1461 }
1462 }
1463
1464 if (Subtarget.hasVInstructionsF32()) {
1465 for (MVT VT : F32VecVTs) {
1466 if (!isTypeLegal(VT))
1467 continue;
1468 SetCommonVFPActions(VT);
1469 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1470 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1471 }
1472 }
1473
1474 if (Subtarget.hasVInstructionsF64()) {
1475 for (MVT VT : F64VecVTs) {
1476 if (!isTypeLegal(VT))
1477 continue;
1478 SetCommonVFPActions(VT);
1479 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1480 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1481 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1482 }
1483 }
1484
1485 if (Subtarget.useRVVForFixedLengthVectors()) {
1487 if (!useRVVForFixedLengthVectorVT(VT))
1488 continue;
1489
1490 // By default everything must be expanded.
1491 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1494 setTruncStoreAction(VT, OtherVT, Expand);
1496 OtherVT, Expand);
1497 }
1498
1499 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1500 // expansion to a build_vector of 0s.
1502
1503 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1505 Custom);
1506
1509 Custom);
1510
1512 VT, Custom);
1513
1515 VT, Custom);
1516
1518
1520
1522
1524
1527 Custom);
1528
1530
1533 Custom);
1534
1536 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1537 Custom);
1538
1540 {
1549 },
1550 VT, Custom);
1552 Custom);
1553
1555
1556 // Operations below are different for between masks and other vectors.
1557 if (VT.getVectorElementType() == MVT::i1) {
1558 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1559 ISD::OR, ISD::XOR},
1560 VT, Custom);
1561
1562 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1563 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1564 VT, Custom);
1565
1566 setOperationAction(ISD::VP_MERGE, VT, Custom);
1567
1568 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1569 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1570 continue;
1571 }
1572
1573 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1574 // it before type legalization for i64 vectors on RV32. It will then be
1575 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1576 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1577 // improvements first.
1578 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1581
1582 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1584 }
1585
1588
1589 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1590 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1591 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1592 ISD::VP_SCATTER},
1593 VT, Custom);
1594 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1595
1599 VT, Custom);
1600
1603
1605
1606 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1607 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1609
1613 VT, Custom);
1614
1616
1619
1620 // Custom-lower reduction operations to set up the corresponding custom
1621 // nodes' operands.
1625 VT, Custom);
1626
1627 setOperationAction(IntegerVPOps, VT, Custom);
1628
1629 if (Subtarget.hasStdExtZvkb())
1631
1632 if (Subtarget.hasStdExtZvbb()) {
1635 VT, Custom);
1636 } else {
1637 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1638 // range of f32.
1639 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1640 if (isTypeLegal(FloatVT))
1643 Custom);
1644 }
1645
1647 }
1648
1650 // There are no extending loads or truncating stores.
1651 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1652 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1653 setTruncStoreAction(VT, InnerVT, Expand);
1654 }
1655
1656 if (!useRVVForFixedLengthVectorVT(VT))
1657 continue;
1658
1659 // By default everything must be expanded.
1660 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1662
1663 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1664 // expansion to a build_vector of 0s.
1666
1671 VT, Custom);
1672 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1673 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1674
1676 VT, Custom);
1677
1680 VT, Custom);
1681 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1682 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1683 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1684 VT, Custom);
1685 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1686
1689 Custom);
1690
1692
1693 if (VT.getVectorElementType() == MVT::f16 &&
1694 !Subtarget.hasVInstructionsF16()) {
1695 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1697 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1698 Custom);
1699 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1700 Custom);
1703 if (Subtarget.hasStdExtZfhmin()) {
1705 } else {
1706 // We need to custom legalize f16 build vectors if Zfhmin isn't
1707 // available.
1709 }
1713 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1714 // Don't promote f16 vector operations to f32 if f32 vector type is
1715 // not legal.
1716 // TODO: could split the f16 vector into two vectors and do promotion.
1717 if (!isTypeLegal(F32VecVT))
1718 continue;
1719 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1720 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1721 continue;
1722 }
1723
1724 if (VT.getVectorElementType() == MVT::bf16) {
1725 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1728 if (Subtarget.hasStdExtZfbfmin()) {
1730 } else {
1731 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1732 // available.
1734 }
1735 if (Subtarget.hasStdExtZvfbfa()) {
1738 setCondCodeAction(VFPCCToExpand, VT, Expand);
1739 }
1741 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1742 Custom);
1743 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1744 // Don't promote f16 vector operations to f32 if f32 vector type is
1745 // not legal.
1746 // TODO: could split the f16 vector into two vectors and do promotion.
1747 if (!isTypeLegal(F32VecVT))
1748 continue;
1749
1750 if (Subtarget.hasStdExtZvfbfa())
1751 setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
1752 else
1753 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1754 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1755 continue;
1756 }
1757
1759 Custom);
1760
1766 VT, Custom);
1767
1772 VT, Custom);
1773
1774 setCondCodeAction(VFPCCToExpand, VT, Expand);
1775
1778
1779 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1780
1781 setOperationAction(FloatingPointVPOps, VT, Custom);
1782
1789 VT, Custom);
1790 }
1791
1792 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1793 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1794 if (Subtarget.is64Bit())
1796 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1798 if (Subtarget.hasStdExtZfbfmin())
1800 if (Subtarget.hasStdExtFOrZfinx())
1802 if (Subtarget.hasStdExtDOrZdinx())
1804 }
1805 }
1806
1807 if (Subtarget.hasStdExtZaamo())
1809
1810 if (Subtarget.hasForcedAtomics()) {
1811 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1817 XLenVT, LibCall);
1818 }
1819
1820 if (Subtarget.hasVendorXTHeadMemIdx()) {
1821 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1822 setIndexedLoadAction(im, MVT::i8, Legal);
1823 setIndexedStoreAction(im, MVT::i8, Legal);
1824 setIndexedLoadAction(im, MVT::i16, Legal);
1825 setIndexedStoreAction(im, MVT::i16, Legal);
1826 setIndexedLoadAction(im, MVT::i32, Legal);
1827 setIndexedStoreAction(im, MVT::i32, Legal);
1828
1829 if (Subtarget.is64Bit()) {
1830 setIndexedLoadAction(im, MVT::i64, Legal);
1831 setIndexedStoreAction(im, MVT::i64, Legal);
1832 }
1833 }
1834 }
1835
1836 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1840
1844 }
1845
1846 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1847 if (Subtarget.hasStdExtZvdot4a8i() && Subtarget.getELen() >= 64) {
1848 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1851 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1852 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1853 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1854 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1855 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1856
1857 if (Subtarget.useRVVForFixedLengthVectors()) {
1859 if (VT.getVectorElementType() != MVT::i32 ||
1860 !useRVVForFixedLengthVectorVT(VT))
1861 continue;
1862 ElementCount EC = VT.getVectorElementCount();
1863 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1864 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1865 }
1866 }
1867 }
1868
1869 // Customize load and store operation for bf16 if zfh isn't enabled.
1870 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1871 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1873 }
1874
1875 // Function alignments.
1876 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1877 setMinFunctionAlignment(FunctionAlignment);
1878 // Set preferred alignments.
1879 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1880 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1881
1887
1888 if (Subtarget.hasStdExtFOrZfinx())
1890
1891 if (Subtarget.hasStdExtZbb())
1893
1894 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1895 Subtarget.hasVInstructions())
1897
1898 if (Subtarget.hasStdExtZbkb())
1900
1901 if (Subtarget.hasStdExtFOrZfinx())
1904 if (Subtarget.hasVInstructions())
1907 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1910 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1915
1916 if (Subtarget.hasVendorXTHeadMemPair())
1918 if (Subtarget.useRVVForFixedLengthVectors())
1920
1921 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
1922
1923 // Disable strict node mutation.
1924 IsStrictFPEnabled = true;
1925 EnableExtLdPromotion = true;
1926
1927 // Let the subtarget decide if a predictable select is more expensive than the
1928 // corresponding branch. This information is used in CGP/SelectOpt to decide
1929 // when to convert selects into branches.
1930 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1931
1932 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1933 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1934
1935 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1936 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1937 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1938
1940 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1941 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1942
1943 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1944 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1945}
1946
1949 if (Subtarget.is64Bit() && Subtarget.hasStdExtP())
1950 if (VT == MVT::v2i16 || VT == MVT::v4i8)
1951 return TypeWidenVector;
1952
1954}
1955
1957 LLVMContext &Context,
1958 EVT VT) const {
1959 if (!VT.isVector())
1960 return getPointerTy(DL);
1961 if (Subtarget.hasVInstructions() &&
1962 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1963 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1965}
1966
1968 return Subtarget.getXLenVT();
1969}
1970
1971// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1972bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1973 unsigned VF,
1974 bool IsScalable) const {
1975 if (!Subtarget.hasVInstructions())
1976 return true;
1977
1978 if (!IsScalable)
1979 return true;
1980
1981 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1982 return true;
1983
1984 // Don't allow VF=1 if those types are't legal.
1985 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1986 return true;
1987
1988 // VLEN=32 support is incomplete.
1989 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1990 return true;
1991
1992 // The maximum VF is for the smallest element width with LMUL=8.
1993 // VF must be a power of 2.
1994 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1995 return VF > MaxVF || !isPowerOf2_32(VF);
1996}
1997
1999 return !Subtarget.hasVInstructions() ||
2000 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
2001}
2002
2005 MachineFunction &MF, unsigned Intrinsic) const {
2006 IntrinsicInfo Info;
2007 auto &DL = I.getDataLayout();
2008
2009 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
2010 bool IsUnitStrided, bool UsePtrVal = false) {
2011 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
2012 // We can't use ptrVal if the intrinsic can access memory before the
2013 // pointer. This means we can't use it for strided or indexed intrinsics.
2014 if (UsePtrVal)
2015 Info.ptrVal = I.getArgOperand(PtrOp);
2016 else
2017 Info.fallbackAddressSpace =
2018 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
2019 Type *MemTy;
2020 if (IsStore) {
2021 // Store value is the first operand.
2022 MemTy = I.getArgOperand(0)->getType();
2023 } else {
2024 // Use return type. If it's segment load, return type is a struct.
2025 MemTy = I.getType();
2026 if (MemTy->isStructTy())
2027 MemTy = MemTy->getStructElementType(0);
2028 }
2029 if (!IsUnitStrided)
2030 MemTy = MemTy->getScalarType();
2031
2032 Info.memVT = getValueType(DL, MemTy);
2033 if (MemTy->isTargetExtTy()) {
2034 // RISC-V vector tuple type's alignment type should be its element type.
2035 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
2036 MemTy = Type::getIntNTy(
2037 MemTy->getContext(),
2038 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
2039 ->getZExtValue());
2040 Info.align = DL.getABITypeAlign(MemTy);
2041 } else {
2042 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
2043 }
2044 Info.size = MemoryLocation::UnknownSize;
2045 Info.flags |=
2047 Infos.push_back(Info);
2048 };
2049
2050 if (I.hasMetadata(LLVMContext::MD_nontemporal))
2052
2054 switch (Intrinsic) {
2055 default:
2056 return;
2057 case Intrinsic::riscv_masked_atomicrmw_xchg:
2058 case Intrinsic::riscv_masked_atomicrmw_add:
2059 case Intrinsic::riscv_masked_atomicrmw_sub:
2060 case Intrinsic::riscv_masked_atomicrmw_nand:
2061 case Intrinsic::riscv_masked_atomicrmw_max:
2062 case Intrinsic::riscv_masked_atomicrmw_min:
2063 case Intrinsic::riscv_masked_atomicrmw_umax:
2064 case Intrinsic::riscv_masked_atomicrmw_umin:
2065 case Intrinsic::riscv_masked_cmpxchg:
2066 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
2067 // narrow atomic operation. These will be expanded to an LR/SC loop that
2068 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
2069 // will be used to modify the appropriate part of the 4 byte data and
2070 // preserve the rest.
2071 Info.opc = ISD::INTRINSIC_W_CHAIN;
2072 Info.memVT = MVT::i32;
2073 Info.ptrVal = I.getArgOperand(0);
2074 Info.offset = 0;
2075 Info.align = Align(4);
2078 Infos.push_back(Info);
2079 return;
2080 case Intrinsic::riscv_seg2_load_mask:
2081 case Intrinsic::riscv_seg3_load_mask:
2082 case Intrinsic::riscv_seg4_load_mask:
2083 case Intrinsic::riscv_seg5_load_mask:
2084 case Intrinsic::riscv_seg6_load_mask:
2085 case Intrinsic::riscv_seg7_load_mask:
2086 case Intrinsic::riscv_seg8_load_mask:
2087 case Intrinsic::riscv_sseg2_load_mask:
2088 case Intrinsic::riscv_sseg3_load_mask:
2089 case Intrinsic::riscv_sseg4_load_mask:
2090 case Intrinsic::riscv_sseg5_load_mask:
2091 case Intrinsic::riscv_sseg6_load_mask:
2092 case Intrinsic::riscv_sseg7_load_mask:
2093 case Intrinsic::riscv_sseg8_load_mask:
2094 SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
2095 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2096 return;
2097 case Intrinsic::riscv_seg2_store_mask:
2098 case Intrinsic::riscv_seg3_store_mask:
2099 case Intrinsic::riscv_seg4_store_mask:
2100 case Intrinsic::riscv_seg5_store_mask:
2101 case Intrinsic::riscv_seg6_store_mask:
2102 case Intrinsic::riscv_seg7_store_mask:
2103 case Intrinsic::riscv_seg8_store_mask:
2104 // Operands are (vec, ..., vec, ptr, mask, vl)
2105 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2106 /*IsStore*/ true,
2107 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2108 return;
2109 case Intrinsic::riscv_sseg2_store_mask:
2110 case Intrinsic::riscv_sseg3_store_mask:
2111 case Intrinsic::riscv_sseg4_store_mask:
2112 case Intrinsic::riscv_sseg5_store_mask:
2113 case Intrinsic::riscv_sseg6_store_mask:
2114 case Intrinsic::riscv_sseg7_store_mask:
2115 case Intrinsic::riscv_sseg8_store_mask:
2116 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
2117 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2118 /*IsStore*/ true,
2119 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2120 return;
2121 case Intrinsic::riscv_vlm:
2122 SetRVVLoadStoreInfo(/*PtrOp*/ 0,
2123 /*IsStore*/ false,
2124 /*IsUnitStrided*/ true,
2125 /*UsePtrVal*/ true);
2126 return;
2127 case Intrinsic::riscv_vle:
2128 case Intrinsic::riscv_vle_mask:
2129 case Intrinsic::riscv_vleff:
2130 case Intrinsic::riscv_vleff_mask:
2131 SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2132 /*IsStore*/ false,
2133 /*IsUnitStrided*/ true,
2134 /*UsePtrVal*/ true);
2135 return;
2136 case Intrinsic::riscv_vsm:
2137 case Intrinsic::riscv_vse:
2138 case Intrinsic::riscv_vse_mask:
2139 SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2140 /*IsStore*/ true,
2141 /*IsUnitStrided*/ true,
2142 /*UsePtrVal*/ true);
2143 return;
2144 case Intrinsic::riscv_vlse:
2145 case Intrinsic::riscv_vlse_mask:
2146 case Intrinsic::riscv_vloxei:
2147 case Intrinsic::riscv_vloxei_mask:
2148 case Intrinsic::riscv_vluxei:
2149 case Intrinsic::riscv_vluxei_mask:
2150 SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2151 /*IsStore*/ false,
2152 /*IsUnitStrided*/ false);
2153 return;
2154 case Intrinsic::riscv_vsse:
2155 case Intrinsic::riscv_vsse_mask:
2156 case Intrinsic::riscv_vsoxei:
2157 case Intrinsic::riscv_vsoxei_mask:
2158 case Intrinsic::riscv_vsuxei:
2159 case Intrinsic::riscv_vsuxei_mask:
2160 SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2161 /*IsStore*/ true,
2162 /*IsUnitStrided*/ false);
2163 return;
2164 case Intrinsic::riscv_vlseg2:
2165 case Intrinsic::riscv_vlseg3:
2166 case Intrinsic::riscv_vlseg4:
2167 case Intrinsic::riscv_vlseg5:
2168 case Intrinsic::riscv_vlseg6:
2169 case Intrinsic::riscv_vlseg7:
2170 case Intrinsic::riscv_vlseg8:
2171 case Intrinsic::riscv_vlseg2ff:
2172 case Intrinsic::riscv_vlseg3ff:
2173 case Intrinsic::riscv_vlseg4ff:
2174 case Intrinsic::riscv_vlseg5ff:
2175 case Intrinsic::riscv_vlseg6ff:
2176 case Intrinsic::riscv_vlseg7ff:
2177 case Intrinsic::riscv_vlseg8ff:
2178 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2179 /*IsStore*/ false,
2180 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2181 return;
2182 case Intrinsic::riscv_vlseg2_mask:
2183 case Intrinsic::riscv_vlseg3_mask:
2184 case Intrinsic::riscv_vlseg4_mask:
2185 case Intrinsic::riscv_vlseg5_mask:
2186 case Intrinsic::riscv_vlseg6_mask:
2187 case Intrinsic::riscv_vlseg7_mask:
2188 case Intrinsic::riscv_vlseg8_mask:
2189 case Intrinsic::riscv_vlseg2ff_mask:
2190 case Intrinsic::riscv_vlseg3ff_mask:
2191 case Intrinsic::riscv_vlseg4ff_mask:
2192 case Intrinsic::riscv_vlseg5ff_mask:
2193 case Intrinsic::riscv_vlseg6ff_mask:
2194 case Intrinsic::riscv_vlseg7ff_mask:
2195 case Intrinsic::riscv_vlseg8ff_mask:
2196 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2197 /*IsStore*/ false,
2198 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2199 return;
2200 case Intrinsic::riscv_vlsseg2:
2201 case Intrinsic::riscv_vlsseg3:
2202 case Intrinsic::riscv_vlsseg4:
2203 case Intrinsic::riscv_vlsseg5:
2204 case Intrinsic::riscv_vlsseg6:
2205 case Intrinsic::riscv_vlsseg7:
2206 case Intrinsic::riscv_vlsseg8:
2207 case Intrinsic::riscv_vloxseg2:
2208 case Intrinsic::riscv_vloxseg3:
2209 case Intrinsic::riscv_vloxseg4:
2210 case Intrinsic::riscv_vloxseg5:
2211 case Intrinsic::riscv_vloxseg6:
2212 case Intrinsic::riscv_vloxseg7:
2213 case Intrinsic::riscv_vloxseg8:
2214 case Intrinsic::riscv_vluxseg2:
2215 case Intrinsic::riscv_vluxseg3:
2216 case Intrinsic::riscv_vluxseg4:
2217 case Intrinsic::riscv_vluxseg5:
2218 case Intrinsic::riscv_vluxseg6:
2219 case Intrinsic::riscv_vluxseg7:
2220 case Intrinsic::riscv_vluxseg8:
2221 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2222 /*IsStore*/ false,
2223 /*IsUnitStrided*/ false);
2224 return;
2225 case Intrinsic::riscv_vlsseg2_mask:
2226 case Intrinsic::riscv_vlsseg3_mask:
2227 case Intrinsic::riscv_vlsseg4_mask:
2228 case Intrinsic::riscv_vlsseg5_mask:
2229 case Intrinsic::riscv_vlsseg6_mask:
2230 case Intrinsic::riscv_vlsseg7_mask:
2231 case Intrinsic::riscv_vlsseg8_mask:
2232 case Intrinsic::riscv_vloxseg2_mask:
2233 case Intrinsic::riscv_vloxseg3_mask:
2234 case Intrinsic::riscv_vloxseg4_mask:
2235 case Intrinsic::riscv_vloxseg5_mask:
2236 case Intrinsic::riscv_vloxseg6_mask:
2237 case Intrinsic::riscv_vloxseg7_mask:
2238 case Intrinsic::riscv_vloxseg8_mask:
2239 case Intrinsic::riscv_vluxseg2_mask:
2240 case Intrinsic::riscv_vluxseg3_mask:
2241 case Intrinsic::riscv_vluxseg4_mask:
2242 case Intrinsic::riscv_vluxseg5_mask:
2243 case Intrinsic::riscv_vluxseg6_mask:
2244 case Intrinsic::riscv_vluxseg7_mask:
2245 case Intrinsic::riscv_vluxseg8_mask:
2246 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
2247 /*IsStore*/ false,
2248 /*IsUnitStrided*/ false);
2249 return;
2250 case Intrinsic::riscv_vsseg2:
2251 case Intrinsic::riscv_vsseg3:
2252 case Intrinsic::riscv_vsseg4:
2253 case Intrinsic::riscv_vsseg5:
2254 case Intrinsic::riscv_vsseg6:
2255 case Intrinsic::riscv_vsseg7:
2256 case Intrinsic::riscv_vsseg8:
2257 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2258 /*IsStore*/ true,
2259 /*IsUnitStrided*/ false);
2260 return;
2261 case Intrinsic::riscv_vsseg2_mask:
2262 case Intrinsic::riscv_vsseg3_mask:
2263 case Intrinsic::riscv_vsseg4_mask:
2264 case Intrinsic::riscv_vsseg5_mask:
2265 case Intrinsic::riscv_vsseg6_mask:
2266 case Intrinsic::riscv_vsseg7_mask:
2267 case Intrinsic::riscv_vsseg8_mask:
2268 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2269 /*IsStore*/ true,
2270 /*IsUnitStrided*/ false);
2271 return;
2272 case Intrinsic::riscv_vssseg2:
2273 case Intrinsic::riscv_vssseg3:
2274 case Intrinsic::riscv_vssseg4:
2275 case Intrinsic::riscv_vssseg5:
2276 case Intrinsic::riscv_vssseg6:
2277 case Intrinsic::riscv_vssseg7:
2278 case Intrinsic::riscv_vssseg8:
2279 case Intrinsic::riscv_vsoxseg2:
2280 case Intrinsic::riscv_vsoxseg3:
2281 case Intrinsic::riscv_vsoxseg4:
2282 case Intrinsic::riscv_vsoxseg5:
2283 case Intrinsic::riscv_vsoxseg6:
2284 case Intrinsic::riscv_vsoxseg7:
2285 case Intrinsic::riscv_vsoxseg8:
2286 case Intrinsic::riscv_vsuxseg2:
2287 case Intrinsic::riscv_vsuxseg3:
2288 case Intrinsic::riscv_vsuxseg4:
2289 case Intrinsic::riscv_vsuxseg5:
2290 case Intrinsic::riscv_vsuxseg6:
2291 case Intrinsic::riscv_vsuxseg7:
2292 case Intrinsic::riscv_vsuxseg8:
2293 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2294 /*IsStore*/ true,
2295 /*IsUnitStrided*/ false);
2296 return;
2297 case Intrinsic::riscv_vssseg2_mask:
2298 case Intrinsic::riscv_vssseg3_mask:
2299 case Intrinsic::riscv_vssseg4_mask:
2300 case Intrinsic::riscv_vssseg5_mask:
2301 case Intrinsic::riscv_vssseg6_mask:
2302 case Intrinsic::riscv_vssseg7_mask:
2303 case Intrinsic::riscv_vssseg8_mask:
2304 case Intrinsic::riscv_vsoxseg2_mask:
2305 case Intrinsic::riscv_vsoxseg3_mask:
2306 case Intrinsic::riscv_vsoxseg4_mask:
2307 case Intrinsic::riscv_vsoxseg5_mask:
2308 case Intrinsic::riscv_vsoxseg6_mask:
2309 case Intrinsic::riscv_vsoxseg7_mask:
2310 case Intrinsic::riscv_vsoxseg8_mask:
2311 case Intrinsic::riscv_vsuxseg2_mask:
2312 case Intrinsic::riscv_vsuxseg3_mask:
2313 case Intrinsic::riscv_vsuxseg4_mask:
2314 case Intrinsic::riscv_vsuxseg5_mask:
2315 case Intrinsic::riscv_vsuxseg6_mask:
2316 case Intrinsic::riscv_vsuxseg7_mask:
2317 case Intrinsic::riscv_vsuxseg8_mask:
2318 SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2319 /*IsStore*/ true,
2320 /*IsUnitStrided*/ false);
2321 return;
2322 case Intrinsic::riscv_sf_vlte8:
2323 case Intrinsic::riscv_sf_vlte16:
2324 case Intrinsic::riscv_sf_vlte32:
2325 case Intrinsic::riscv_sf_vlte64:
2326 Info.opc = ISD::INTRINSIC_VOID;
2327 Info.ptrVal = I.getArgOperand(1);
2328 switch (Intrinsic) {
2329 case Intrinsic::riscv_sf_vlte8:
2330 Info.memVT = MVT::i8;
2331 Info.align = Align(1);
2332 break;
2333 case Intrinsic::riscv_sf_vlte16:
2334 Info.memVT = MVT::i16;
2335 Info.align = Align(2);
2336 break;
2337 case Intrinsic::riscv_sf_vlte32:
2338 Info.memVT = MVT::i32;
2339 Info.align = Align(4);
2340 break;
2341 case Intrinsic::riscv_sf_vlte64:
2342 Info.memVT = MVT::i64;
2343 Info.align = Align(8);
2344 break;
2345 }
2346 Info.size = MemoryLocation::UnknownSize;
2347 Info.flags |= MachineMemOperand::MOLoad;
2348 Infos.push_back(Info);
2349 return;
2350 case Intrinsic::riscv_sf_vste8:
2351 case Intrinsic::riscv_sf_vste16:
2352 case Intrinsic::riscv_sf_vste32:
2353 case Intrinsic::riscv_sf_vste64:
2354 Info.opc = ISD::INTRINSIC_VOID;
2355 Info.ptrVal = I.getArgOperand(1);
2356 switch (Intrinsic) {
2357 case Intrinsic::riscv_sf_vste8:
2358 Info.memVT = MVT::i8;
2359 Info.align = Align(1);
2360 break;
2361 case Intrinsic::riscv_sf_vste16:
2362 Info.memVT = MVT::i16;
2363 Info.align = Align(2);
2364 break;
2365 case Intrinsic::riscv_sf_vste32:
2366 Info.memVT = MVT::i32;
2367 Info.align = Align(4);
2368 break;
2369 case Intrinsic::riscv_sf_vste64:
2370 Info.memVT = MVT::i64;
2371 Info.align = Align(8);
2372 break;
2373 }
2374 Info.size = MemoryLocation::UnknownSize;
2375 Info.flags |= MachineMemOperand::MOStore;
2376 Infos.push_back(Info);
2377 return;
2378 }
2379}
2380
2382 const AddrMode &AM, Type *Ty,
2383 unsigned AS,
2384 Instruction *I) const {
2385 // No global is ever allowed as a base.
2386 if (AM.BaseGV)
2387 return false;
2388
2389 // None of our addressing modes allows a scalable offset
2390 if (AM.ScalableOffset)
2391 return false;
2392
2393 // RVV instructions only support register addressing.
2394 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2395 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2396
2397 // Require a 12-bit signed offset.
2398 if (!isInt<12>(AM.BaseOffs))
2399 return false;
2400
2401 switch (AM.Scale) {
2402 case 0: // "r+i" or just "i", depending on HasBaseReg.
2403 break;
2404 case 1:
2405 if (!AM.HasBaseReg) // allow "r+i".
2406 break;
2407 return false; // disallow "r+r" or "r+r+i".
2408 default:
2409 return false;
2410 }
2411
2412 return true;
2413}
2414
2416 return isInt<12>(Imm);
2417}
2418
2420 return isInt<12>(Imm);
2421}
2422
2423// On RV32, 64-bit integers are split into their high and low parts and held
2424// in two different registers, so the trunc is free since the low register can
2425// just be used.
2426// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2427// isTruncateFree?
2429 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2430 return false;
2431 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2432 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2433 return (SrcBits == 64 && DestBits == 32);
2434}
2435
2437 // We consider i64->i32 free on RV64 since we have good selection of W
2438 // instructions that make promoting operations back to i64 free in many cases.
2439 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2440 !DstVT.isInteger())
2441 return false;
2442 unsigned SrcBits = SrcVT.getSizeInBits();
2443 unsigned DestBits = DstVT.getSizeInBits();
2444 return (SrcBits == 64 && DestBits == 32);
2445}
2446
2448 EVT SrcVT = Val.getValueType();
2449 // free truncate from vnsrl and vnsra
2450 if (Subtarget.hasVInstructions() &&
2451 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2452 SrcVT.isVector() && VT2.isVector()) {
2453 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2454 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2455 if (SrcBits == DestBits * 2) {
2456 return true;
2457 }
2458 }
2459 return TargetLowering::isTruncateFree(Val, VT2);
2460}
2461
2463 // Zexts are free if they can be combined with a load.
2464 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2465 // poorly with type legalization of compares preferring sext.
2466 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2467 EVT MemVT = LD->getMemoryVT();
2468 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2469 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2470 LD->getExtensionType() == ISD::ZEXTLOAD))
2471 return true;
2472 }
2473
2474 return TargetLowering::isZExtFree(Val, VT2);
2475}
2476
2478 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2479}
2480
2482 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2483}
2484
2486 return Subtarget.hasCTZLike();
2487}
2488
2490 return Subtarget.hasCLZLike();
2491}
2492
2494 const Instruction &AndI) const {
2495 // We expect to be able to match a bit extraction instruction if the Zbs
2496 // extension is supported and the mask is a power of two. However, we
2497 // conservatively return false if the mask would fit in an ANDI instruction,
2498 // on the basis that it's possible the sinking+duplication of the AND in
2499 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2500 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2501 if (!Subtarget.hasBEXTILike())
2502 return false;
2504 if (!Mask)
2505 return false;
2506 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2507}
2508
2510 EVT VT = Y.getValueType();
2511
2512 if (VT.isVector())
2513 return false;
2514
2515 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2516 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2517}
2518
2520 EVT VT = Y.getValueType();
2521
2522 if (!VT.isVector())
2523 return hasAndNotCompare(Y);
2524
2525 return Subtarget.hasStdExtZvkb();
2526}
2527
2529 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2530 if (Subtarget.hasStdExtZbs())
2531 return X.getValueType().isScalarInteger();
2532 auto *C = dyn_cast<ConstantSDNode>(Y);
2533 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2534 if (Subtarget.hasVendorXTHeadBs())
2535 return C != nullptr;
2536 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2537 return C && C->getAPIntValue().ule(10);
2538}
2539
2541 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2542 SDValue Y) const {
2543 if (SelectOpcode != ISD::VSELECT)
2544 return false;
2545
2546 // Only enable for rvv.
2547 if (!VT.isVector() || !Subtarget.hasVInstructions())
2548 return false;
2549
2550 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2551 return false;
2552
2553 return true;
2554}
2555
2557 Type *Ty) const {
2558 assert(Ty->isIntegerTy());
2559
2560 unsigned BitSize = Ty->getIntegerBitWidth();
2561 if (BitSize > Subtarget.getXLen())
2562 return false;
2563
2564 // Fast path, assume 32-bit immediates are cheap.
2565 int64_t Val = Imm.getSExtValue();
2566 if (isInt<32>(Val))
2567 return true;
2568
2569 // A constant pool entry may be more aligned than the load we're trying to
2570 // replace. If we don't support unaligned scalar mem, prefer the constant
2571 // pool.
2572 // TODO: Can the caller pass down the alignment?
2573 if (!Subtarget.enableUnalignedScalarMem())
2574 return true;
2575
2576 // Prefer to keep the load if it would require many instructions.
2577 // This uses the same threshold we use for constant pools but doesn't
2578 // check useConstantPoolForLargeInts.
2579 // TODO: Should we keep the load only when we're definitely going to emit a
2580 // constant pool?
2581
2583 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2584}
2585
2589 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2590 SelectionDAG &DAG) const {
2591 // One interesting pattern that we'd want to form is 'bit extract':
2592 // ((1 >> Y) & 1) ==/!= 0
2593 // But we also need to be careful not to try to reverse that fold.
2594
2595 // Is this '((1 >> Y) & 1)'?
2596 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2597 return false; // Keep the 'bit extract' pattern.
2598
2599 // Will this be '((1 >> Y) & 1)' after the transform?
2600 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2601 return true; // Do form the 'bit extract' pattern.
2602
2603 // If 'X' is a constant, and we transform, then we will immediately
2604 // try to undo the fold, thus causing endless combine loop.
2605 // So only do the transform if X is not a constant. This matches the default
2606 // implementation of this function.
2607 return !XC;
2608}
2609
2611 unsigned Opc = VecOp.getOpcode();
2612
2613 // Assume target opcodes can't be scalarized.
2614 // TODO - do we have any exceptions?
2615 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2616 return false;
2617
2618 // If the vector op is not supported, try to convert to scalar.
2619 EVT VecVT = VecOp.getValueType();
2621 return true;
2622
2623 // If the vector op is supported, but the scalar op is not, the transform may
2624 // not be worthwhile.
2625 // Permit a vector binary operation can be converted to scalar binary
2626 // operation which is custom lowered with illegal type.
2627 EVT ScalarVT = VecVT.getScalarType();
2628 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2629 isOperationCustom(Opc, ScalarVT);
2630}
2631
2633 const GlobalAddressSDNode *GA) const {
2634 // In order to maximise the opportunity for common subexpression elimination,
2635 // keep a separate ADD node for the global address offset instead of folding
2636 // it in the global address node. Later peephole optimisations may choose to
2637 // fold it back in when profitable.
2638 return false;
2639}
2640
2641// Returns 0-31 if the fli instruction is available for the type and this is
2642// legal FP immediate for the type. Returns -1 otherwise.
2644 if (!Subtarget.hasStdExtZfa())
2645 return -1;
2646
2647 bool IsSupportedVT = false;
2648 if (VT == MVT::f16) {
2649 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2650 } else if (VT == MVT::f32) {
2651 IsSupportedVT = true;
2652 } else if (VT == MVT::f64) {
2653 assert(Subtarget.hasStdExtD() && "Expect D extension");
2654 IsSupportedVT = true;
2655 }
2656
2657 if (!IsSupportedVT)
2658 return -1;
2659
2660 return RISCVLoadFPImm::getLoadFPImm(Imm);
2661}
2662
2664 bool ForCodeSize) const {
2665 bool IsLegalVT = false;
2666 if (VT == MVT::f16)
2667 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2668 else if (VT == MVT::f32)
2669 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2670 else if (VT == MVT::f64)
2671 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2672 else if (VT == MVT::bf16)
2673 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2674
2675 if (!IsLegalVT)
2676 return false;
2677
2678 if (getLegalZfaFPImm(Imm, VT) >= 0)
2679 return true;
2680
2681 // Some constants can be produced by fli+fneg.
2682 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2683 return true;
2684
2685 // Cannot create a 64 bit floating-point immediate value for rv32.
2686 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2687 // td can handle +0.0 or -0.0 already.
2688 // -0.0 can be created by fmv + fneg.
2689 return Imm.isZero();
2690 }
2691
2692 // Special case: fmv + fneg
2693 if (Imm.isNegZero())
2694 return true;
2695
2696 // Building an integer and then converting requires a fmv at the end of
2697 // the integer sequence. The fmv is not required for Zfinx.
2698 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2699 const int Cost =
2700 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2701 Subtarget.getXLen(), Subtarget);
2702 return Cost <= FPImmCost;
2703}
2704
2705// TODO: This is very conservative.
2707 unsigned Index) const {
2709 return false;
2710
2711 // Extracts from index 0 are just subreg extracts.
2712 if (Index == 0)
2713 return true;
2714
2715 // Only support extracting a fixed from a fixed vector for now.
2716 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2717 return false;
2718
2719 EVT EltVT = ResVT.getVectorElementType();
2720 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2721
2722 // The smallest type we can slide is i8.
2723 if (EltVT == MVT::i1)
2724 return false;
2725
2726 unsigned ResElts = ResVT.getVectorNumElements();
2727 unsigned SrcElts = SrcVT.getVectorNumElements();
2728
2729 unsigned MinVLen = Subtarget.getRealMinVLen();
2730 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2731
2732 // If we're extracting only data from the first VLEN bits of the source
2733 // then we can always do this with an m1 vslidedown.vx. Restricting the
2734 // Index ensures we can use a vslidedown.vi.
2735 // TODO: We can generalize this when the exact VLEN is known.
2736 if (Index + ResElts <= MinVLMAX && Index < 31)
2737 return true;
2738
2739 // Convervatively only handle extracting half of a vector.
2740 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2741 // the upper half of a vector until we have more test coverage.
2742 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2743 // a cheap extract. However, this case is important in practice for
2744 // shuffled extracts of longer vectors. How resolve?
2745 return (ResElts * 2) == SrcElts && Index == ResElts;
2746}
2747
2749 CallingConv::ID CC,
2750 EVT VT) const {
2751 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2752 // We might still end up using a GPR but that will be decided based on ABI.
2753 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2754 !Subtarget.hasStdExtZfhminOrZhinxmin())
2755 return MVT::f32;
2756
2757 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2758}
2759
2760unsigned
2762 std::optional<MVT> RegisterVT) const {
2763 // Pair inline assembly operand
2764 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2765 *RegisterVT == MVT::Untyped)
2766 return 1;
2767
2768 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2769}
2770
2772 CallingConv::ID CC,
2773 EVT VT) const {
2774 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2775 // We might still end up using a GPR but that will be decided based on ABI.
2776 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2777 !Subtarget.hasStdExtZfhminOrZhinxmin())
2778 return 1;
2779
2780 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2781}
2782
2783// Changes the condition code and swaps operands if necessary, so the SetCC
2784// operation matches one of the comparisons supported directly by branches
2785// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2786// with 1/-1.
2788 ISD::CondCode &CC, SelectionDAG &DAG,
2789 const RISCVSubtarget &Subtarget) {
2790 // If this is a single bit test that can't be handled by ANDI, shift the
2791 // bit to be tested to the MSB and perform a signed compare with 0.
2792 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2793 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2794 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2795 // XAndesPerf supports branch on test bit.
2796 !Subtarget.hasVendorXAndesPerf()) {
2797 uint64_t Mask = LHS.getConstantOperandVal(1);
2798 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2799 unsigned ShAmt = 0;
2800 if (isPowerOf2_64(Mask)) {
2801 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2802 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2803 } else {
2804 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2805 }
2806
2807 LHS = LHS.getOperand(0);
2808 if (ShAmt != 0)
2809 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2810 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2811 return;
2812 }
2813 }
2814
2815 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2816 int64_t C = RHSC->getSExtValue();
2817 switch (CC) {
2818 default: break;
2819 case ISD::SETGT:
2820 // Convert X > -1 to X >= 0.
2821 if (C == -1) {
2822 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2823 CC = ISD::SETGE;
2824 return;
2825 }
2826 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2827 C != INT64_MAX && isInt<5>(C + 1)) {
2828 // We have a conditional move instruction for SETGE but not SETGT.
2829 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2830 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2831 CC = ISD::SETGE;
2832 return;
2833 }
2834 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2835 // We have a branch immediate instruction for SETGE but not SETGT.
2836 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2837 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2838 CC = ISD::SETGE;
2839 return;
2840 }
2841 break;
2842 case ISD::SETLT:
2843 // Convert X < 1 to 0 >= X.
2844 if (C == 1) {
2845 RHS = LHS;
2846 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2847 CC = ISD::SETGE;
2848 return;
2849 }
2850 break;
2851 case ISD::SETUGT:
2852 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2853 C != INT64_MAX && isUInt<5>(C + 1)) {
2854 // We have a conditional move instruction for SETUGE but not SETUGT.
2855 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2856 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2857 CC = ISD::SETUGE;
2858 return;
2859 }
2860 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2861 // We have a branch immediate instruction for SETUGE but not SETUGT.
2862 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2863 // immediate.
2864 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2865 CC = ISD::SETUGE;
2866 return;
2867 }
2868 break;
2869 }
2870 }
2871
2872 switch (CC) {
2873 default:
2874 break;
2875 case ISD::SETGT:
2876 case ISD::SETLE:
2877 case ISD::SETUGT:
2878 case ISD::SETULE:
2880 std::swap(LHS, RHS);
2881 break;
2882 }
2883}
2884
2886 if (VT.isRISCVVectorTuple()) {
2887 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2888 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2889 return RISCVVType::LMUL_F8;
2890 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2891 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2892 return RISCVVType::LMUL_F4;
2893 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2894 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2895 return RISCVVType::LMUL_F2;
2896 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2897 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2898 return RISCVVType::LMUL_1;
2899 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2900 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2901 return RISCVVType::LMUL_2;
2902 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2903 return RISCVVType::LMUL_4;
2904 llvm_unreachable("Invalid vector tuple type LMUL.");
2905 }
2906
2907 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2908 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2909 if (VT.getVectorElementType() == MVT::i1)
2910 KnownSize *= 8;
2911
2912 switch (KnownSize) {
2913 default:
2914 llvm_unreachable("Invalid LMUL.");
2915 case 8:
2916 return RISCVVType::LMUL_F8;
2917 case 16:
2918 return RISCVVType::LMUL_F4;
2919 case 32:
2920 return RISCVVType::LMUL_F2;
2921 case 64:
2922 return RISCVVType::LMUL_1;
2923 case 128:
2924 return RISCVVType::LMUL_2;
2925 case 256:
2926 return RISCVVType::LMUL_4;
2927 case 512:
2928 return RISCVVType::LMUL_8;
2929 }
2930}
2931
2933 switch (LMul) {
2934 default:
2935 llvm_unreachable("Invalid LMUL.");
2939 case RISCVVType::LMUL_1:
2940 return RISCV::VRRegClassID;
2941 case RISCVVType::LMUL_2:
2942 return RISCV::VRM2RegClassID;
2943 case RISCVVType::LMUL_4:
2944 return RISCV::VRM4RegClassID;
2945 case RISCVVType::LMUL_8:
2946 return RISCV::VRM8RegClassID;
2947 }
2948}
2949
2950unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2951 RISCVVType::VLMUL LMUL = getLMUL(VT);
2952 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2953 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2954 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2955 "Unexpected subreg numbering");
2956 return RISCV::sub_vrm1_0 + Index;
2957 }
2958 if (LMUL == RISCVVType::LMUL_2) {
2959 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2960 "Unexpected subreg numbering");
2961 return RISCV::sub_vrm2_0 + Index;
2962 }
2963 if (LMUL == RISCVVType::LMUL_4) {
2964 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2965 "Unexpected subreg numbering");
2966 return RISCV::sub_vrm4_0 + Index;
2967 }
2968 llvm_unreachable("Invalid vector type.");
2969}
2970
2972 if (VT.isRISCVVectorTuple()) {
2973 unsigned NF = VT.getRISCVVectorTupleNumFields();
2974 unsigned RegsPerField =
2975 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2976 (NF * RISCV::RVVBitsPerBlock));
2977 switch (RegsPerField) {
2978 case 1:
2979 if (NF == 2)
2980 return RISCV::VRN2M1RegClassID;
2981 if (NF == 3)
2982 return RISCV::VRN3M1RegClassID;
2983 if (NF == 4)
2984 return RISCV::VRN4M1RegClassID;
2985 if (NF == 5)
2986 return RISCV::VRN5M1RegClassID;
2987 if (NF == 6)
2988 return RISCV::VRN6M1RegClassID;
2989 if (NF == 7)
2990 return RISCV::VRN7M1RegClassID;
2991 if (NF == 8)
2992 return RISCV::VRN8M1RegClassID;
2993 break;
2994 case 2:
2995 if (NF == 2)
2996 return RISCV::VRN2M2RegClassID;
2997 if (NF == 3)
2998 return RISCV::VRN3M2RegClassID;
2999 if (NF == 4)
3000 return RISCV::VRN4M2RegClassID;
3001 break;
3002 case 4:
3003 assert(NF == 2);
3004 return RISCV::VRN2M4RegClassID;
3005 default:
3006 break;
3007 }
3008 llvm_unreachable("Invalid vector tuple type RegClass.");
3009 }
3010
3011 if (VT.getVectorElementType() == MVT::i1)
3012 return RISCV::VRRegClassID;
3013 return getRegClassIDForLMUL(getLMUL(VT));
3014}
3015
3016// Attempt to decompose a subvector insert/extract between VecVT and
3017// SubVecVT via subregister indices. Returns the subregister index that
3018// can perform the subvector insert/extract with the given element index, as
3019// well as the index corresponding to any leftover subvectors that must be
3020// further inserted/extracted within the register class for SubVecVT.
3021std::pair<unsigned, unsigned>
3023 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
3024 const RISCVRegisterInfo *TRI) {
3025 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
3026 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
3027 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
3028 "Register classes not ordered");
3029 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
3030 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
3031
3032 // If VecVT is a vector tuple type, either it's the tuple type with same
3033 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
3034 if (VecVT.isRISCVVectorTuple()) {
3035 if (VecRegClassID == SubRegClassID)
3036 return {RISCV::NoSubRegister, 0};
3037
3038 assert(SubVecVT.isScalableVector() &&
3039 "Only allow scalable vector subvector.");
3040 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
3041 "Invalid vector tuple insert/extract for vector and subvector with "
3042 "different LMUL.");
3043 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
3044 }
3045
3046 // Try to compose a subregister index that takes us from the incoming
3047 // LMUL>1 register class down to the outgoing one. At each step we half
3048 // the LMUL:
3049 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
3050 // Note that this is not guaranteed to find a subregister index, such as
3051 // when we are extracting from one VR type to another.
3052 unsigned SubRegIdx = RISCV::NoSubRegister;
3053 for (const unsigned RCID :
3054 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
3055 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
3056 VecVT = VecVT.getHalfNumVectorElementsVT();
3057 bool IsHi =
3058 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
3059 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
3060 getSubregIndexByMVT(VecVT, IsHi));
3061 if (IsHi)
3062 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
3063 }
3064 return {SubRegIdx, InsertExtractIdx};
3065}
3066
3067// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
3068// stores for those types.
3069bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
3070 return !Subtarget.useRVVForFixedLengthVectors() ||
3071 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
3072}
3073
3075 if (!ScalarTy.isSimple())
3076 return false;
3077 switch (ScalarTy.getSimpleVT().SimpleTy) {
3078 case MVT::iPTR:
3079 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
3080 case MVT::i8:
3081 case MVT::i16:
3082 case MVT::i32:
3083 return Subtarget.hasVInstructions();
3084 case MVT::i64:
3085 return Subtarget.hasVInstructionsI64();
3086 case MVT::f16:
3087 return Subtarget.hasVInstructionsF16Minimal();
3088 case MVT::bf16:
3089 return Subtarget.hasVInstructionsBF16Minimal();
3090 case MVT::f32:
3091 return Subtarget.hasVInstructionsF32();
3092 case MVT::f64:
3093 return Subtarget.hasVInstructionsF64();
3094 default:
3095 return false;
3096 }
3097}
3098
3099
3101 return NumRepeatedDivisors;
3102}
3103
3105 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3106 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3107 "Unexpected opcode");
3108 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3109 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3111 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3112 if (!II)
3113 return SDValue();
3114 return Op.getOperand(II->VLOperand + 1 + HasChain);
3115}
3116
3118 const RISCVSubtarget &Subtarget) {
3119 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
3120 if (!Subtarget.useRVVForFixedLengthVectors())
3121 return false;
3122
3123 // We only support a set of vector types with a consistent maximum fixed size
3124 // across all supported vector element types to avoid legalization issues.
3125 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
3126 // fixed-length vector type we support is 1024 bytes.
3127 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
3128 return false;
3129
3130 unsigned MinVLen = Subtarget.getRealMinVLen();
3131
3132 MVT EltVT = VT.getVectorElementType();
3133
3134 // Don't use RVV for vectors we cannot scalarize if required.
3135 switch (EltVT.SimpleTy) {
3136 // i1 is supported but has different rules.
3137 default:
3138 return false;
3139 case MVT::i1:
3140 // Masks can only use a single register.
3141 if (VT.getVectorNumElements() > MinVLen)
3142 return false;
3143 MinVLen /= 8;
3144 break;
3145 case MVT::i8:
3146 case MVT::i16:
3147 case MVT::i32:
3148 break;
3149 case MVT::i64:
3150 if (!Subtarget.hasVInstructionsI64())
3151 return false;
3152 break;
3153 case MVT::f16:
3154 if (!Subtarget.hasVInstructionsF16Minimal())
3155 return false;
3156 break;
3157 case MVT::bf16:
3158 if (!Subtarget.hasVInstructionsBF16Minimal())
3159 return false;
3160 break;
3161 case MVT::f32:
3162 if (!Subtarget.hasVInstructionsF32())
3163 return false;
3164 break;
3165 case MVT::f64:
3166 if (!Subtarget.hasVInstructionsF64())
3167 return false;
3168 break;
3169 }
3170
3171 // Reject elements larger than ELEN.
3172 if (EltVT.getSizeInBits() > Subtarget.getELen())
3173 return false;
3174
3175 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
3176 // Don't use RVV for types that don't fit.
3177 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
3178 return false;
3179
3180 // TODO: Perhaps an artificial restriction, but worth having whilst getting
3181 // the base fixed length RVV support in place.
3182 if (!VT.isPow2VectorType())
3183 return false;
3184
3185 return true;
3186}
3187
3188bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
3189 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
3190}
3191
3192// Return the largest legal scalable vector type that matches VT's element type.
3194 const RISCVSubtarget &Subtarget) {
3195 // This may be called before legal types are setup.
3196 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
3197 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
3198 "Expected legal fixed length vector!");
3199
3200 unsigned MinVLen = Subtarget.getRealMinVLen();
3201 unsigned MaxELen = Subtarget.getELen();
3202
3203 MVT EltVT = VT.getVectorElementType();
3204 switch (EltVT.SimpleTy) {
3205 default:
3206 llvm_unreachable("unexpected element type for RVV container");
3207 case MVT::i1:
3208 case MVT::i8:
3209 case MVT::i16:
3210 case MVT::i32:
3211 case MVT::i64:
3212 case MVT::bf16:
3213 case MVT::f16:
3214 case MVT::f32:
3215 case MVT::f64: {
3216 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
3217 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
3218 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
3219 unsigned NumElts =
3221 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
3222 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
3223 return MVT::getScalableVectorVT(EltVT, NumElts);
3224 }
3225 }
3226}
3227
3229 const RISCVSubtarget &Subtarget) {
3231 Subtarget);
3232}
3233
3235 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
3236}
3237
3238// Grow V to consume an entire RVV register.
3240 const RISCVSubtarget &Subtarget) {
3241 assert(VT.isScalableVector() &&
3242 "Expected to convert into a scalable vector!");
3243 assert(V.getValueType().isFixedLengthVector() &&
3244 "Expected a fixed length vector operand!");
3245 SDLoc DL(V);
3246 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
3247}
3248
3249// Shrink V so it's just big enough to maintain a VT's worth of data.
3251 const RISCVSubtarget &Subtarget) {
3253 "Expected to convert into a fixed length vector!");
3254 assert(V.getValueType().isScalableVector() &&
3255 "Expected a scalable vector operand!");
3256 SDLoc DL(V);
3257 return DAG.getExtractSubvector(DL, VT, V, 0);
3258}
3259
3260/// Return the type of the mask type suitable for masking the provided
3261/// vector type. This is simply an i1 element type vector of the same
3262/// (possibly scalable) length.
3263static MVT getMaskTypeFor(MVT VecVT) {
3264 assert(VecVT.isVector());
3266 return MVT::getVectorVT(MVT::i1, EC);
3267}
3268
3269/// Creates an all ones mask suitable for masking a vector of type VecTy with
3270/// vector length VL. .
3271static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
3272 SelectionDAG &DAG) {
3273 MVT MaskVT = getMaskTypeFor(VecVT);
3274 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3275}
3276
3277static std::pair<SDValue, SDValue>
3279 const RISCVSubtarget &Subtarget) {
3280 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
3281 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
3282 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
3283 return {Mask, VL};
3284}
3285
3286static std::pair<SDValue, SDValue>
3287getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
3288 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
3289 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3290 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
3291 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
3292 return {Mask, VL};
3293}
3294
3295// Gets the two common "VL" operands: an all-ones mask and the vector length.
3296// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
3297// the vector type that the fixed-length vector is contained in. Otherwise if
3298// VecVT is scalable, then ContainerVT should be the same as VecVT.
3299static std::pair<SDValue, SDValue>
3300getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
3301 const RISCVSubtarget &Subtarget) {
3302 if (VecVT.isFixedLengthVector())
3303 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
3304 Subtarget);
3305 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3306 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
3307}
3308
3310 SelectionDAG &DAG) const {
3311 assert(VecVT.isScalableVector() && "Expected scalable vector");
3312 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
3313 VecVT.getVectorElementCount());
3314}
3315
3316std::pair<unsigned, unsigned>
3318 const RISCVSubtarget &Subtarget) {
3319 assert(VecVT.isScalableVector() && "Expected scalable vector");
3320
3321 unsigned EltSize = VecVT.getScalarSizeInBits();
3322 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3323
3324 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3325 unsigned MaxVLMAX =
3326 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3327
3328 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3329 unsigned MinVLMAX =
3330 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3331
3332 return std::make_pair(MinVLMAX, MaxVLMAX);
3333}
3334
3335// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3336// of either is (currently) supported. This can get us into an infinite loop
3337// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3338// as a ..., etc.
3339// Until either (or both) of these can reliably lower any node, reporting that
3340// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3341// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3342// which is not desirable.
3344 EVT VT, unsigned DefinedValues) const {
3345 return false;
3346}
3347
3349 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3350 // implementation-defined.
3351 if (!VT.isVector())
3353 unsigned DLenFactor = Subtarget.getDLenFactor();
3354 unsigned Cost;
3355 if (VT.isScalableVector()) {
3356 unsigned LMul;
3357 bool Fractional;
3358 std::tie(LMul, Fractional) =
3360 if (Fractional)
3361 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3362 else
3363 Cost = (LMul * DLenFactor);
3364 } else {
3365 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3366 }
3367 return Cost;
3368}
3369
3370
3371/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3372/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3373/// be by default. VRGatherCostModel reflects available options. Note that
3374/// operand (index and possibly mask) are handled separately.
3376 auto LMULCost = getLMULCost(VT);
3377 bool Log2CostModel =
3378 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3379 if (Log2CostModel && LMULCost.isValid()) {
3380 unsigned Log = Log2_64(LMULCost.getValue());
3381 if (Log > 0)
3382 return LMULCost * Log;
3383 }
3384 return LMULCost * LMULCost;
3385}
3386
3387/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3388/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3389/// or may track the vrgather.vv cost. It is implementation-dependent.
3393
3394/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3395/// for the type VT. (This does not cover the vslide1up or vslide1down
3396/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3397/// or may track the vrgather.vv cost. It is implementation-dependent.
3401
3402/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3403/// for the type VT. (This does not cover the vslide1up or vslide1down
3404/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3405/// or may track the vrgather.vv cost. It is implementation-dependent.
3409
3411 const RISCVSubtarget &Subtarget) {
3412 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3413 // bf16 conversions are always promoted to f32.
3414 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3415 Op.getValueType() == MVT::bf16) {
3416 bool IsStrict = Op->isStrictFPOpcode();
3417
3418 SDLoc DL(Op);
3419 if (IsStrict) {
3420 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3421 {Op.getOperand(0), Op.getOperand(1)});
3422 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3423 {Op.getValueType(), MVT::Other},
3424 {Val.getValue(1), Val.getValue(0),
3425 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3426 }
3427 return DAG.getNode(
3428 ISD::FP_ROUND, DL, Op.getValueType(),
3429 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3430 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3431 }
3432
3433 // Other operations are legal.
3434 return Op;
3435}
3436
3438 const RISCVSubtarget &Subtarget) {
3439 // RISC-V FP-to-int conversions saturate to the destination register size, but
3440 // don't produce 0 for nan. We can use a conversion instruction and fix the
3441 // nan case with a compare and a select.
3442 SDValue Src = Op.getOperand(0);
3443
3444 MVT DstVT = Op.getSimpleValueType();
3445 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3446
3447 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3448
3449 if (!DstVT.isVector()) {
3450 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3451 // the result.
3452 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3453 Src.getValueType() == MVT::bf16) {
3454 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3455 }
3456
3457 unsigned Opc;
3458 if (SatVT == DstVT)
3459 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3460 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3461 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3462 else
3463 return SDValue();
3464 // FIXME: Support other SatVTs by clamping before or after the conversion.
3465
3466 SDLoc DL(Op);
3467 SDValue FpToInt = DAG.getNode(
3468 Opc, DL, DstVT, Src,
3470
3471 if (Opc == RISCVISD::FCVT_WU_RV64)
3472 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3473
3474 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3475 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3477 }
3478
3479 // Vectors.
3480
3481 MVT DstEltVT = DstVT.getVectorElementType();
3482 MVT SrcVT = Src.getSimpleValueType();
3483 MVT SrcEltVT = SrcVT.getVectorElementType();
3484 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3485 unsigned DstEltSize = DstEltVT.getSizeInBits();
3486
3487 // Only handle saturating to the destination type.
3488 if (SatVT != DstEltVT)
3489 return SDValue();
3490
3491 MVT DstContainerVT = DstVT;
3492 MVT SrcContainerVT = SrcVT;
3493 if (DstVT.isFixedLengthVector()) {
3494 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3495 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3496 assert(DstContainerVT.getVectorElementCount() ==
3497 SrcContainerVT.getVectorElementCount() &&
3498 "Expected same element count");
3499 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3500 }
3501
3502 SDLoc DL(Op);
3503
3504 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3505
3506 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3507 {Src, Src, DAG.getCondCode(ISD::SETNE),
3508 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3509
3510 // Need to widen by more than 1 step, promote the FP type, then do a widening
3511 // convert.
3512 if (DstEltSize > (2 * SrcEltSize)) {
3513 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3514 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3515 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3516 }
3517
3518 MVT CvtContainerVT = DstContainerVT;
3519 MVT CvtEltVT = DstEltVT;
3520 if (SrcEltSize > (2 * DstEltSize)) {
3521 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3522 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3523 }
3524
3525 unsigned RVVOpc =
3526 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3527 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3528
3529 while (CvtContainerVT != DstContainerVT) {
3530 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3531 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3532 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3533 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3534 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3535 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3536 }
3537
3538 SDValue SplatZero = DAG.getNode(
3539 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3540 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3541 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3542 Res, DAG.getUNDEF(DstContainerVT), VL);
3543
3544 if (DstVT.isFixedLengthVector())
3545 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3546
3547 return Res;
3548}
3549
3551 const RISCVSubtarget &Subtarget) {
3552 bool IsStrict = Op->isStrictFPOpcode();
3553 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3554
3555 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3556 // bf16 conversions are always promoted to f32.
3557 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3558 SrcVal.getValueType() == MVT::bf16) {
3559 SDLoc DL(Op);
3560 if (IsStrict) {
3561 SDValue Ext =
3562 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3563 {Op.getOperand(0), SrcVal});
3564 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3565 {Ext.getValue(1), Ext.getValue(0)});
3566 }
3567 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3568 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3569 }
3570
3571 // Other operations are legal.
3572 return Op;
3573}
3574
3576 switch (Opc) {
3577 case ISD::FROUNDEVEN:
3579 case ISD::VP_FROUNDEVEN:
3580 return RISCVFPRndMode::RNE;
3581 case ISD::FTRUNC:
3582 case ISD::STRICT_FTRUNC:
3583 case ISD::VP_FROUNDTOZERO:
3584 return RISCVFPRndMode::RTZ;
3585 case ISD::FFLOOR:
3586 case ISD::STRICT_FFLOOR:
3587 case ISD::VP_FFLOOR:
3588 return RISCVFPRndMode::RDN;
3589 case ISD::FCEIL:
3590 case ISD::STRICT_FCEIL:
3591 case ISD::VP_FCEIL:
3592 return RISCVFPRndMode::RUP;
3593 case ISD::FROUND:
3594 case ISD::LROUND:
3595 case ISD::LLROUND:
3596 case ISD::STRICT_FROUND:
3597 case ISD::STRICT_LROUND:
3599 case ISD::VP_FROUND:
3600 return RISCVFPRndMode::RMM;
3601 case ISD::FRINT:
3602 case ISD::LRINT:
3603 case ISD::LLRINT:
3604 case ISD::STRICT_FRINT:
3605 case ISD::STRICT_LRINT:
3606 case ISD::STRICT_LLRINT:
3607 case ISD::VP_FRINT:
3608 case ISD::VP_LRINT:
3609 case ISD::VP_LLRINT:
3610 return RISCVFPRndMode::DYN;
3611 }
3612
3614}
3615
3616// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3617// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3618// the integer domain and back. Taking care to avoid converting values that are
3619// nan or already correct.
3620static SDValue
3622 const RISCVSubtarget &Subtarget) {
3623 MVT VT = Op.getSimpleValueType();
3624 assert(VT.isVector() && "Unexpected type");
3625
3626 SDLoc DL(Op);
3627
3628 SDValue Src = Op.getOperand(0);
3629
3630 // Freeze the source since we are increasing the number of uses.
3631 Src = DAG.getFreeze(Src);
3632
3633 MVT ContainerVT = VT;
3634 if (VT.isFixedLengthVector()) {
3635 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3636 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3637 }
3638
3639 SDValue Mask, VL;
3640 if (Op->isVPOpcode()) {
3641 Mask = Op.getOperand(1);
3642 if (VT.isFixedLengthVector())
3643 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3644 Subtarget);
3645 VL = Op.getOperand(2);
3646 } else {
3647 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3648 }
3649
3650 // We do the conversion on the absolute value and fix the sign at the end.
3651 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3652
3653 // Determine the largest integer that can be represented exactly. This and
3654 // values larger than it don't have any fractional bits so don't need to
3655 // be converted.
3656 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3657 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3658 APFloat MaxVal = APFloat(FltSem);
3659 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3660 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3661 SDValue MaxValNode =
3662 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3663 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3664 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3665
3666 // If abs(Src) was larger than MaxVal or nan, keep it.
3667 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3668 Mask =
3669 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3670 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3671 Mask, Mask, VL});
3672
3673 // Truncate to integer and convert back to FP.
3674 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3675 MVT XLenVT = Subtarget.getXLenVT();
3676 SDValue Truncated;
3677
3678 switch (Op.getOpcode()) {
3679 default:
3680 llvm_unreachable("Unexpected opcode");
3681 case ISD::FRINT:
3682 case ISD::VP_FRINT:
3683 case ISD::FCEIL:
3684 case ISD::VP_FCEIL:
3685 case ISD::FFLOOR:
3686 case ISD::VP_FFLOOR:
3687 case ISD::FROUND:
3688 case ISD::FROUNDEVEN:
3689 case ISD::VP_FROUND:
3690 case ISD::VP_FROUNDEVEN:
3691 case ISD::VP_FROUNDTOZERO: {
3694 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3695 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3696 break;
3697 }
3698 case ISD::FTRUNC:
3699 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3700 Mask, VL);
3701 break;
3702 case ISD::FNEARBYINT:
3703 case ISD::VP_FNEARBYINT:
3704 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3705 Mask, VL);
3706 break;
3707 }
3708
3709 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3710 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3711 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3712 Mask, VL);
3713
3714 // Restore the original sign so that -0.0 is preserved.
3715 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3716 Src, Src, Mask, VL);
3717
3718 if (!VT.isFixedLengthVector())
3719 return Truncated;
3720
3721 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3722}
3723
3724// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3725// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3726// qNan and converting the new source to integer and back to FP.
3727static SDValue
3729 const RISCVSubtarget &Subtarget) {
3730 SDLoc DL(Op);
3731 MVT VT = Op.getSimpleValueType();
3732 SDValue Chain = Op.getOperand(0);
3733 SDValue Src = Op.getOperand(1);
3734
3735 MVT ContainerVT = VT;
3736 if (VT.isFixedLengthVector()) {
3737 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3738 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3739 }
3740
3741 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3742
3743 // Freeze the source since we are increasing the number of uses.
3744 Src = DAG.getFreeze(Src);
3745
3746 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3747 MVT MaskVT = Mask.getSimpleValueType();
3748 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3749 DAG.getVTList(MaskVT, MVT::Other),
3750 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3751 DAG.getUNDEF(MaskVT), Mask, VL});
3752 Chain = Unorder.getValue(1);
3753 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3754 DAG.getVTList(ContainerVT, MVT::Other),
3755 {Chain, Src, Src, Src, Unorder, VL});
3756 Chain = Src.getValue(1);
3757
3758 // We do the conversion on the absolute value and fix the sign at the end.
3759 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3760
3761 // Determine the largest integer that can be represented exactly. This and
3762 // values larger than it don't have any fractional bits so don't need to
3763 // be converted.
3764 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3765 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3766 APFloat MaxVal = APFloat(FltSem);
3767 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3768 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3769 SDValue MaxValNode =
3770 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3771 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3772 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3773
3774 // If abs(Src) was larger than MaxVal or nan, keep it.
3775 Mask = DAG.getNode(
3776 RISCVISD::SETCC_VL, DL, MaskVT,
3777 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3778
3779 // Truncate to integer and convert back to FP.
3780 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3781 MVT XLenVT = Subtarget.getXLenVT();
3782 SDValue Truncated;
3783
3784 switch (Op.getOpcode()) {
3785 default:
3786 llvm_unreachable("Unexpected opcode");
3787 case ISD::STRICT_FCEIL:
3788 case ISD::STRICT_FFLOOR:
3789 case ISD::STRICT_FROUND:
3793 Truncated = DAG.getNode(
3794 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3795 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3796 break;
3797 }
3798 case ISD::STRICT_FTRUNC:
3799 Truncated =
3800 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3801 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3802 break;
3804 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3805 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3806 Mask, VL);
3807 break;
3808 }
3809 Chain = Truncated.getValue(1);
3810
3811 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3812 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3813 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3814 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3815 Truncated, Mask, VL);
3816 Chain = Truncated.getValue(1);
3817 }
3818
3819 // Restore the original sign so that -0.0 is preserved.
3820 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3821 Src, Src, Mask, VL);
3822
3823 if (VT.isFixedLengthVector())
3824 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3825 return DAG.getMergeValues({Truncated, Chain}, DL);
3826}
3827
3828static SDValue
3830 const RISCVSubtarget &Subtarget) {
3831 MVT VT = Op.getSimpleValueType();
3832 if (VT.isVector())
3833 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3834
3835 if (DAG.shouldOptForSize())
3836 return SDValue();
3837
3838 SDLoc DL(Op);
3839 SDValue Src = Op.getOperand(0);
3840
3841 // Create an integer the size of the mantissa with the MSB set. This and all
3842 // values larger than it don't have any fractional bits so don't need to be
3843 // converted.
3844 const fltSemantics &FltSem = VT.getFltSemantics();
3845 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3846 APFloat MaxVal = APFloat(FltSem);
3847 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3848 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3849 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3850
3852 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3853 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3854}
3855
3856// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3858 const RISCVSubtarget &Subtarget) {
3859 SDLoc DL(Op);
3860 MVT DstVT = Op.getSimpleValueType();
3861 SDValue Src = Op.getOperand(0);
3862 MVT SrcVT = Src.getSimpleValueType();
3863 assert(SrcVT.isVector() && DstVT.isVector() &&
3864 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3865 "Unexpected type");
3866
3867 MVT DstContainerVT = DstVT;
3868 MVT SrcContainerVT = SrcVT;
3869
3870 if (DstVT.isFixedLengthVector()) {
3871 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3872 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3873 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3874 }
3875
3876 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3877
3878 // [b]f16 -> f32
3879 MVT SrcElemType = SrcVT.getVectorElementType();
3880 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3881 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3882 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3883 }
3884
3885 SDValue Res =
3886 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3887 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3888 Subtarget.getXLenVT()),
3889 VL);
3890
3891 if (!DstVT.isFixedLengthVector())
3892 return Res;
3893
3894 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3895}
3896
3897static SDValue
3899 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3900 SDValue Offset, SDValue Mask, SDValue VL,
3902 if (Passthru.isUndef())
3904 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3905 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3906 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3907}
3908
3909static SDValue
3910getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3911 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3912 SDValue VL,
3914 if (Passthru.isUndef())
3916 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3917 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3918 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3919}
3920
3924 int64_t Addend;
3925};
3926
3927static std::optional<APInt> getExactInteger(const APFloat &APF,
3929 // We will use a SINT_TO_FP to materialize this constant so we should use a
3930 // signed APSInt here.
3931 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3932 // We use an arbitrary rounding mode here. If a floating-point is an exact
3933 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3934 // the rounding mode changes the output value, then it is not an exact
3935 // integer.
3937 bool IsExact;
3938 // If it is out of signed integer range, it will return an invalid operation.
3939 // If it is not an exact integer, IsExact is false.
3940 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3942 !IsExact)
3943 return std::nullopt;
3944 return ValInt.extractBits(BitWidth, 0);
3945}
3946
3947// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3948// to the (non-zero) step S and start value X. This can be then lowered as the
3949// RVV sequence (VID * S) + X, for example.
3950// The step S is represented as an integer numerator divided by a positive
3951// denominator. Note that the implementation currently only identifies
3952// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3953// cannot detect 2/3, for example.
3954// Note that this method will also match potentially unappealing index
3955// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3956// determine whether this is worth generating code for.
3957//
3958// EltSizeInBits is the size of the type that the sequence will be calculated
3959// in, i.e. SEW for build_vectors or XLEN for address calculations.
3960static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3961 unsigned EltSizeInBits) {
3962 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3964 return std::nullopt;
3965 bool IsInteger = Op.getValueType().isInteger();
3966
3967 std::optional<unsigned> SeqStepDenom;
3968 std::optional<APInt> SeqStepNum;
3969 std::optional<APInt> SeqAddend;
3970 std::optional<std::pair<APInt, unsigned>> PrevElt;
3971 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3972
3973 // First extract the ops into a list of constant integer values. This may not
3974 // be possible for floats if they're not all representable as integers.
3975 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3976 const unsigned OpSize = Op.getScalarValueSizeInBits();
3977 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3978 if (Elt.isUndef()) {
3979 Elts[Idx] = std::nullopt;
3980 continue;
3981 }
3982 if (IsInteger) {
3983 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3984 } else {
3985 auto ExactInteger =
3986 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3987 if (!ExactInteger)
3988 return std::nullopt;
3989 Elts[Idx] = *ExactInteger;
3990 }
3991 }
3992
3993 for (auto [Idx, Elt] : enumerate(Elts)) {
3994 // Assume undef elements match the sequence; we just have to be careful
3995 // when interpolating across them.
3996 if (!Elt)
3997 continue;
3998
3999 if (PrevElt) {
4000 // Calculate the step since the last non-undef element, and ensure
4001 // it's consistent across the entire sequence.
4002 unsigned IdxDiff = Idx - PrevElt->second;
4003 APInt ValDiff = *Elt - PrevElt->first;
4004
4005 // A zero-value value difference means that we're somewhere in the middle
4006 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
4007 // step change before evaluating the sequence.
4008 if (ValDiff == 0)
4009 continue;
4010
4011 int64_t Remainder = ValDiff.srem(IdxDiff);
4012 // Normalize the step if it's greater than 1.
4013 if (Remainder != ValDiff.getSExtValue()) {
4014 // The difference must cleanly divide the element span.
4015 if (Remainder != 0)
4016 return std::nullopt;
4017 ValDiff = ValDiff.sdiv(IdxDiff);
4018 IdxDiff = 1;
4019 }
4020
4021 if (!SeqStepNum)
4022 SeqStepNum = ValDiff;
4023 else if (ValDiff != SeqStepNum)
4024 return std::nullopt;
4025
4026 if (!SeqStepDenom)
4027 SeqStepDenom = IdxDiff;
4028 else if (IdxDiff != *SeqStepDenom)
4029 return std::nullopt;
4030 }
4031
4032 // Record this non-undef element for later.
4033 if (!PrevElt || PrevElt->first != *Elt)
4034 PrevElt = std::make_pair(*Elt, Idx);
4035 }
4036
4037 // We need to have logged a step for this to count as a legal index sequence.
4038 if (!SeqStepNum || !SeqStepDenom)
4039 return std::nullopt;
4040
4041 // Loop back through the sequence and validate elements we might have skipped
4042 // while waiting for a valid step. While doing this, log any sequence addend.
4043 for (auto [Idx, Elt] : enumerate(Elts)) {
4044 if (!Elt)
4045 continue;
4046 APInt ExpectedVal =
4047 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
4048 *SeqStepNum)
4049 .sdiv(*SeqStepDenom);
4050
4051 APInt Addend = *Elt - ExpectedVal;
4052 if (!SeqAddend)
4053 SeqAddend = Addend;
4054 else if (Addend != SeqAddend)
4055 return std::nullopt;
4056 }
4057
4058 assert(SeqAddend && "Must have an addend if we have a step");
4059
4060 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
4061 SeqAddend->getSExtValue()};
4062}
4063
4064// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
4065// and lower it as a VRGATHER_VX_VL from the source vector.
4066static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
4067 SelectionDAG &DAG,
4068 const RISCVSubtarget &Subtarget) {
4069 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
4070 return SDValue();
4071 SDValue Src = SplatVal.getOperand(0);
4072 // Don't perform this optimization for i1 vectors, or if the element types are
4073 // different
4074 // FIXME: Support i1 vectors, maybe by promoting to i8?
4075 MVT EltTy = VT.getVectorElementType();
4076 if (EltTy == MVT::i1 ||
4077 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
4078 return SDValue();
4079 MVT SrcVT = Src.getSimpleValueType();
4080 if (EltTy != SrcVT.getVectorElementType())
4081 return SDValue();
4082 SDValue Idx = SplatVal.getOperand(1);
4083 // The index must be a legal type.
4084 if (Idx.getValueType() != Subtarget.getXLenVT())
4085 return SDValue();
4086
4087 // Check that we know Idx lies within VT
4088 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
4089 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
4090 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
4091 return SDValue();
4092 }
4093
4094 // Convert fixed length vectors to scalable
4095 MVT ContainerVT = VT;
4096 if (VT.isFixedLengthVector())
4097 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4098
4099 MVT SrcContainerVT = SrcVT;
4100 if (SrcVT.isFixedLengthVector()) {
4101 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4102 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4103 }
4104
4105 // Put Vec in a VT sized vector
4106 if (SrcContainerVT.getVectorMinNumElements() <
4107 ContainerVT.getVectorMinNumElements())
4108 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
4109 else
4110 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
4111
4112 // We checked that Idx fits inside VT earlier
4113 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4114 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
4115 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
4116 if (VT.isFixedLengthVector())
4117 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
4118 return Gather;
4119}
4120
4122 const RISCVSubtarget &Subtarget) {
4123 MVT VT = Op.getSimpleValueType();
4124 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4125
4126 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4127
4128 SDLoc DL(Op);
4129 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4130
4131 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
4132 int64_t StepNumerator = SimpleVID->StepNumerator;
4133 unsigned StepDenominator = SimpleVID->StepDenominator;
4134 int64_t Addend = SimpleVID->Addend;
4135
4136 assert(StepNumerator != 0 && "Invalid step");
4137 bool Negate = false;
4138 int64_t SplatStepVal = StepNumerator;
4139 unsigned StepOpcode = ISD::MUL;
4140 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
4141 // anyway as the shift of 63 won't fit in uimm5.
4142 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
4143 isPowerOf2_64(std::abs(StepNumerator))) {
4144 Negate = StepNumerator < 0;
4145 StepOpcode = ISD::SHL;
4146 SplatStepVal = Log2_64(std::abs(StepNumerator));
4147 }
4148
4149 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
4150 // since it's the immediate value many RVV instructions accept. There is
4151 // no vmul.vi instruction so ensure multiply constant can fit in a
4152 // single addi instruction. For the addend, we allow up to 32 bits..
4153 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
4154 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
4155 isPowerOf2_32(StepDenominator) &&
4156 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
4157 MVT VIDVT =
4159 MVT VIDContainerVT =
4160 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
4161 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
4162 // Convert right out of the scalable type so we can use standard ISD
4163 // nodes for the rest of the computation. If we used scalable types with
4164 // these, we'd lose the fixed-length vector info and generate worse
4165 // vsetvli code.
4166 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
4167 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
4168 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
4169 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
4170 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
4171 }
4172 if (StepDenominator != 1) {
4173 SDValue SplatStep =
4174 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
4175 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
4176 }
4177 if (Addend != 0 || Negate) {
4178 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
4179 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
4180 VID);
4181 }
4182 if (VT.isFloatingPoint()) {
4183 // TODO: Use vfwcvt to reduce register pressure.
4184 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
4185 }
4186 return VID;
4187 }
4188 }
4189
4190 return SDValue();
4191}
4192
4193/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
4194/// which constitute a large proportion of the elements. In such cases we can
4195/// splat a vector with the dominant element and make up the shortfall with
4196/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
4197/// Note that this includes vectors of 2 elements by association. The
4198/// upper-most element is the "dominant" one, allowing us to use a splat to
4199/// "insert" the upper element, and an insert of the lower element at position
4200/// 0, which improves codegen.
4202 const RISCVSubtarget &Subtarget) {
4203 MVT VT = Op.getSimpleValueType();
4204 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4205
4206 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4207
4208 SDLoc DL(Op);
4209 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4210
4211 MVT XLenVT = Subtarget.getXLenVT();
4212 unsigned NumElts = Op.getNumOperands();
4213
4214 SDValue DominantValue;
4215 unsigned MostCommonCount = 0;
4216 DenseMap<SDValue, unsigned> ValueCounts;
4217 unsigned NumUndefElts =
4218 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4219
4220 // Track the number of scalar loads we know we'd be inserting, estimated as
4221 // any non-zero floating-point constant. Other kinds of element are either
4222 // already in registers or are materialized on demand. The threshold at which
4223 // a vector load is more desirable than several scalar materializion and
4224 // vector-insertion instructions is not known.
4225 unsigned NumScalarLoads = 0;
4226
4227 for (SDValue V : Op->op_values()) {
4228 if (V.isUndef())
4229 continue;
4230
4231 unsigned &Count = ValueCounts[V];
4232 if (0 == Count)
4233 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
4234 NumScalarLoads += !CFP->isExactlyValue(+0.0);
4235
4236 // Is this value dominant? In case of a tie, prefer the highest element as
4237 // it's cheaper to insert near the beginning of a vector than it is at the
4238 // end.
4239 if (++Count >= MostCommonCount) {
4240 DominantValue = V;
4241 MostCommonCount = Count;
4242 }
4243 }
4244
4245 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
4246 unsigned NumDefElts = NumElts - NumUndefElts;
4247 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
4248
4249 // Don't perform this optimization when optimizing for size, since
4250 // materializing elements and inserting them tends to cause code bloat.
4251 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
4252 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
4253 ((MostCommonCount > DominantValueCountThreshold) ||
4254 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
4255 // Start by splatting the most common element.
4256 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
4257
4258 DenseSet<SDValue> Processed{DominantValue};
4259
4260 // We can handle an insert into the last element (of a splat) via
4261 // v(f)slide1down. This is slightly better than the vslideup insert
4262 // lowering as it avoids the need for a vector group temporary. It
4263 // is also better than using vmerge.vx as it avoids the need to
4264 // materialize the mask in a vector register.
4265 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
4266 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
4267 LastOp != DominantValue) {
4268 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4269 auto OpCode =
4270 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4271 if (!VT.isFloatingPoint())
4272 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
4273 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4274 LastOp, Mask, VL);
4275 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
4276 Processed.insert(LastOp);
4277 }
4278
4279 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
4280 for (const auto &OpIdx : enumerate(Op->ops())) {
4281 const SDValue &V = OpIdx.value();
4282 if (V.isUndef() || !Processed.insert(V).second)
4283 continue;
4284 if (ValueCounts[V] == 1) {
4285 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
4286 } else {
4287 // Blend in all instances of this value using a VSELECT, using a
4288 // mask where each bit signals whether that element is the one
4289 // we're after.
4291 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
4292 return DAG.getConstant(V == V1, DL, XLenVT);
4293 });
4294 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
4295 DAG.getBuildVector(SelMaskTy, DL, Ops),
4296 DAG.getSplatBuildVector(VT, DL, V), Vec);
4297 }
4298 }
4299
4300 return Vec;
4301 }
4302
4303 return SDValue();
4304}
4305
4307 const RISCVSubtarget &Subtarget) {
4308 MVT VT = Op.getSimpleValueType();
4309 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4310
4311 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4312
4313 SDLoc DL(Op);
4314 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4315
4316 MVT XLenVT = Subtarget.getXLenVT();
4317 unsigned NumElts = Op.getNumOperands();
4318
4319 if (VT.getVectorElementType() == MVT::i1) {
4320 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4321 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4322 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4323 }
4324
4325 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4326 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4327 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4328 }
4329
4330 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4331 // scalar integer chunks whose bit-width depends on the number of mask
4332 // bits and XLEN.
4333 // First, determine the most appropriate scalar integer type to use. This
4334 // is at most XLenVT, but may be shrunk to a smaller vector element type
4335 // according to the size of the final vector - use i8 chunks rather than
4336 // XLenVT if we're producing a v8i1. This results in more consistent
4337 // codegen across RV32 and RV64.
4338 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4339 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4340 // If we have to use more than one INSERT_VECTOR_ELT then this
4341 // optimization is likely to increase code size; avoid performing it in
4342 // such a case. We can use a load from a constant pool in this case.
4343 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4344 return SDValue();
4345 // Now we can create our integer vector type. Note that it may be larger
4346 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4347 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4348 MVT IntegerViaVecVT =
4349 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4350 IntegerViaVecElts);
4351
4352 uint64_t Bits = 0;
4353 unsigned BitPos = 0, IntegerEltIdx = 0;
4354 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4355
4356 for (unsigned I = 0; I < NumElts;) {
4357 SDValue V = Op.getOperand(I);
4358 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4359 Bits |= ((uint64_t)BitValue << BitPos);
4360 ++BitPos;
4361 ++I;
4362
4363 // Once we accumulate enough bits to fill our scalar type or process the
4364 // last element, insert into our vector and clear our accumulated data.
4365 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4366 if (NumViaIntegerBits <= 32)
4367 Bits = SignExtend64<32>(Bits);
4368 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4369 Elts[IntegerEltIdx] = Elt;
4370 Bits = 0;
4371 BitPos = 0;
4372 IntegerEltIdx++;
4373 }
4374 }
4375
4376 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4377
4378 if (NumElts < NumViaIntegerBits) {
4379 // If we're producing a smaller vector than our minimum legal integer
4380 // type, bitcast to the equivalent (known-legal) mask type, and extract
4381 // our final mask.
4382 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4383 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4384 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4385 } else {
4386 // Else we must have produced an integer type with the same size as the
4387 // mask type; bitcast for the final result.
4388 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4389 Vec = DAG.getBitcast(VT, Vec);
4390 }
4391
4392 return Vec;
4393 }
4394
4396 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4397 : RISCVISD::VMV_V_X_VL;
4398 if (!VT.isFloatingPoint())
4399 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4400 Splat =
4401 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4402 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4403 }
4404
4405 // Try and match index sequences, which we can lower to the vid instruction
4406 // with optional modifications. An all-undef vector is matched by
4407 // getSplatValue, above.
4408 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4409 return Res;
4410
4411 // For very small build_vectors, use a single scalar insert of a constant.
4412 // TODO: Base this on constant rematerialization cost, not size.
4413 const unsigned EltBitSize = VT.getScalarSizeInBits();
4414 if (VT.getSizeInBits() <= 32 &&
4416 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4417 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4418 "Unexpected sequence type");
4419 // If we can use the original VL with the modified element type, this
4420 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4421 // be moved into InsertVSETVLI?
4422 unsigned ViaVecLen =
4423 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4424 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4425
4426 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4427 uint64_t SplatValue = 0;
4428 // Construct the amalgamated value at this larger vector type.
4429 for (const auto &OpIdx : enumerate(Op->op_values())) {
4430 const auto &SeqV = OpIdx.value();
4431 if (!SeqV.isUndef())
4432 SplatValue |=
4433 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4434 }
4435
4436 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4437 // achieve better constant materializion.
4438 // On RV32, we need to sign-extend to use getSignedConstant.
4439 if (ViaIntVT == MVT::i32)
4440 SplatValue = SignExtend64<32>(SplatValue);
4441
4442 SDValue Vec = DAG.getInsertVectorElt(
4443 DL, DAG.getUNDEF(ViaVecVT),
4444 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4445 if (ViaVecLen != 1)
4446 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4447 return DAG.getBitcast(VT, Vec);
4448 }
4449
4450
4451 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4452 // when re-interpreted as a vector with a larger element type. For example,
4453 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4454 // could be instead splat as
4455 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4456 // TODO: This optimization could also work on non-constant splats, but it
4457 // would require bit-manipulation instructions to construct the splat value.
4458 SmallVector<SDValue> Sequence;
4459 const auto *BV = cast<BuildVectorSDNode>(Op);
4460 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4462 BV->getRepeatedSequence(Sequence) &&
4463 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4464 unsigned SeqLen = Sequence.size();
4465 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4466 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4467 ViaIntVT == MVT::i64) &&
4468 "Unexpected sequence type");
4469
4470 // If we can use the original VL with the modified element type, this
4471 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4472 // be moved into InsertVSETVLI?
4473 const unsigned RequiredVL = NumElts / SeqLen;
4474 const unsigned ViaVecLen =
4475 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4476 NumElts : RequiredVL;
4477 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4478
4479 unsigned EltIdx = 0;
4480 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4481 uint64_t SplatValue = 0;
4482 // Construct the amalgamated value which can be splatted as this larger
4483 // vector type.
4484 for (const auto &SeqV : Sequence) {
4485 if (!SeqV.isUndef())
4486 SplatValue |=
4487 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4488 EltIdx++;
4489 }
4490
4491 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4492 // achieve better constant materializion.
4493 // On RV32, we need to sign-extend to use getSignedConstant.
4494 if (ViaIntVT == MVT::i32)
4495 SplatValue = SignExtend64<32>(SplatValue);
4496
4497 // Since we can't introduce illegal i64 types at this stage, we can only
4498 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4499 // way we can use RVV instructions to splat.
4500 assert((ViaIntVT.bitsLE(XLenVT) ||
4501 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4502 "Unexpected bitcast sequence");
4503 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4504 SDValue ViaVL =
4505 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4506 MVT ViaContainerVT =
4507 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4508 SDValue Splat =
4509 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4510 DAG.getUNDEF(ViaContainerVT),
4511 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4512 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4513 if (ViaVecLen != RequiredVL)
4515 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4516 return DAG.getBitcast(VT, Splat);
4517 }
4518 }
4519
4520 // If the number of signbits allows, see if we can lower as a <N x i8>.
4521 // Our main goal here is to reduce LMUL (and thus work) required to
4522 // build the constant, but we will also narrow if the resulting
4523 // narrow vector is known to materialize cheaply.
4524 // TODO: We really should be costing the smaller vector. There are
4525 // profitable cases this misses.
4526 if (EltBitSize > 8 && VT.isInteger() &&
4527 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4528 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4529 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4530 DL, Op->ops());
4531 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4532 Source, DAG, Subtarget);
4533 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4534 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4535 }
4536
4537 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4538 return Res;
4539
4540 // For constant vectors, use generic constant pool lowering. Otherwise,
4541 // we'd have to materialize constants in GPRs just to move them into the
4542 // vector.
4543 return SDValue();
4544}
4545
4546static unsigned getPACKOpcode(unsigned DestBW,
4547 const RISCVSubtarget &Subtarget) {
4548 switch (DestBW) {
4549 default:
4550 llvm_unreachable("Unsupported pack size");
4551 case 16:
4552 return RISCV::PACKH;
4553 case 32:
4554 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4555 case 64:
4556 assert(Subtarget.is64Bit());
4557 return RISCV::PACK;
4558 }
4559}
4560
4561/// Double the element size of the build vector to reduce the number
4562/// of vslide1down in the build vector chain. In the worst case, this
4563/// trades three scalar operations for 1 vector operation. Scalar
4564/// operations are generally lower latency, and for out-of-order cores
4565/// we also benefit from additional parallelism.
4567 const RISCVSubtarget &Subtarget) {
4568 SDLoc DL(Op);
4569 MVT VT = Op.getSimpleValueType();
4570 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4571 MVT ElemVT = VT.getVectorElementType();
4572 if (!ElemVT.isInteger())
4573 return SDValue();
4574
4575 // TODO: Relax these architectural restrictions, possibly with costing
4576 // of the actual instructions required.
4577 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4578 return SDValue();
4579
4580 unsigned NumElts = VT.getVectorNumElements();
4581 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4582 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4583 NumElts % 2 != 0)
4584 return SDValue();
4585
4586 // Produce [B,A] packed into a type twice as wide. Note that all
4587 // scalars are XLenVT, possibly masked (see below).
4588 MVT XLenVT = Subtarget.getXLenVT();
4589 SDValue Mask = DAG.getConstant(
4590 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4591 auto pack = [&](SDValue A, SDValue B) {
4592 // Bias the scheduling of the inserted operations to near the
4593 // definition of the element - this tends to reduce register
4594 // pressure overall.
4595 SDLoc ElemDL(B);
4596 if (Subtarget.hasStdExtZbkb())
4597 // Note that we're relying on the high bits of the result being
4598 // don't care. For PACKW, the result is *sign* extended.
4599 return SDValue(
4600 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4601 ElemDL, XLenVT, A, B),
4602 0);
4603
4604 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4605 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4606 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4607 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4608 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4610 };
4611
4612 SmallVector<SDValue> NewOperands;
4613 NewOperands.reserve(NumElts / 2);
4614 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4615 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4616 assert(NumElts == NewOperands.size() * 2);
4617 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4618 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4619 return DAG.getNode(ISD::BITCAST, DL, VT,
4620 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4621}
4622
4624 const RISCVSubtarget &Subtarget) {
4625 MVT VT = Op.getSimpleValueType();
4626 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4627
4628 MVT EltVT = VT.getVectorElementType();
4629 MVT XLenVT = Subtarget.getXLenVT();
4630
4631 SDLoc DL(Op);
4632
4633 if (Subtarget.isRV32() && Subtarget.hasStdExtP()) {
4634 if (VT != MVT::v4i8)
4635 return SDValue();
4636
4637 // <4 x i8> BUILD_VECTOR a, b, c, d -> PACK(PPACK.DH pair(a, c), pair(b, d))
4638 SDValue Val0 =
4639 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(0));
4640 SDValue Val1 =
4641 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(1));
4642 SDValue Val2 =
4643 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(2));
4644 SDValue Val3 =
4645 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(3));
4646 SDValue PPairDB =
4647 DAG.getNode(RISCVISD::PPAIRE_DB, DL, {MVT::v4i8, MVT::v4i8},
4648 {Val0, Val2, Val1, Val3});
4649
4650 return DAG.getNode(
4651 ISD::BITCAST, DL, MVT::v4i8,
4652 SDValue(
4653 DAG.getMachineNode(
4654 RISCV::PACK, DL, MVT::i32,
4655 {DAG.getNode(ISD::BITCAST, DL, MVT::i32, PPairDB.getValue(0)),
4656 DAG.getNode(ISD::BITCAST, DL, MVT::i32, PPairDB.getValue(1))}),
4657 0));
4658 }
4659
4660 // Proper support for f16 requires Zvfh. bf16 always requires special
4661 // handling. We need to cast the scalar to integer and create an integer
4662 // build_vector.
4663 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4664 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
4665 MVT IVT = VT.changeVectorElementType(MVT::i16);
4666 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4667 for (const auto &[I, U] : enumerate(Op->ops())) {
4668 SDValue Elem = U.get();
4669 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4670 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4671 // Called by LegalizeDAG, we need to use XLenVT operations since we
4672 // can't create illegal types.
4673 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4674 // Manually constant fold so the integer build_vector can be lowered
4675 // better. Waiting for DAGCombine will be too late.
4676 APInt V =
4677 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4678 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4679 } else {
4680 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4681 }
4682 } else {
4683 // Called by scalar type legalizer, we can use i16.
4684 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4685 }
4686 }
4687 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4688 return DAG.getBitcast(VT, Res);
4689 }
4690
4691 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4693 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4694
4695 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4696
4697 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4698
4699 if (VT.getVectorElementType() == MVT::i1) {
4700 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4701 // vector type, we have a legal equivalently-sized i8 type, so we can use
4702 // that.
4703 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4704 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4705
4706 SDValue WideVec;
4708 // For a splat, perform a scalar truncate before creating the wider
4709 // vector.
4710 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4711 DAG.getConstant(1, DL, Splat.getValueType()));
4712 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4713 } else {
4714 SmallVector<SDValue, 8> Ops(Op->op_values());
4715 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4716 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4717 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4718 }
4719
4720 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4721 }
4722
4724 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4725 return Gather;
4726
4727 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4728 // pressure at high LMUL.
4729 if (all_of(Op->ops().drop_front(),
4730 [](const SDUse &U) { return U.get().isUndef(); })) {
4731 unsigned Opc =
4732 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4733 if (!VT.isFloatingPoint())
4734 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4735 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4736 Splat, VL);
4737 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4738 }
4739
4740 unsigned Opc =
4741 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4742 if (!VT.isFloatingPoint())
4743 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4744 Splat =
4745 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4746 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4747 }
4748
4749 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4750 return Res;
4751
4752 // If we're compiling for an exact VLEN value, we can split our work per
4753 // register in the register group.
4754 if (const auto VLen = Subtarget.getRealVLen();
4755 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4756 MVT ElemVT = VT.getVectorElementType();
4757 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4758 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4759 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4760 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4761 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4762
4763 // The following semantically builds up a fixed length concat_vector
4764 // of the component build_vectors. We eagerly lower to scalable and
4765 // insert_subvector here to avoid DAG combining it back to a large
4766 // build_vector.
4767 SmallVector<SDValue> BuildVectorOps(Op->ops());
4768 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4769 SDValue Vec = DAG.getUNDEF(ContainerVT);
4770 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4771 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4772 SDValue SubBV =
4773 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4774 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4775 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4776 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4777 }
4778 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4779 }
4780
4781 // If we're about to resort to vslide1down (or stack usage), pack our
4782 // elements into the widest scalar type we can. This will force a VL/VTYPE
4783 // toggle, but reduces the critical path, the number of vslide1down ops
4784 // required, and possibly enables scalar folds of the values.
4785 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4786 return Res;
4787
4788 // For m1 vectors, if we have non-undef values in both halves of our vector,
4789 // split the vector into low and high halves, build them separately, then
4790 // use a vselect to combine them. For long vectors, this cuts the critical
4791 // path of the vslide1down sequence in half, and gives us an opportunity
4792 // to special case each half independently. Note that we don't change the
4793 // length of the sub-vectors here, so if both fallback to the generic
4794 // vslide1down path, we should be able to fold the vselect into the final
4795 // vslidedown (for the undef tail) for the first half w/ masking.
4796 unsigned NumElts = VT.getVectorNumElements();
4797 unsigned NumUndefElts =
4798 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4799 unsigned NumDefElts = NumElts - NumUndefElts;
4800 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4801 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4802 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4803 SmallVector<SDValue> MaskVals;
4804 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4805 SubVecAOps.reserve(NumElts);
4806 SubVecBOps.reserve(NumElts);
4807 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4808 SDValue Elem = U.get();
4809 if (Idx < NumElts / 2) {
4810 SubVecAOps.push_back(Elem);
4811 SubVecBOps.push_back(UndefElem);
4812 } else {
4813 SubVecAOps.push_back(UndefElem);
4814 SubVecBOps.push_back(Elem);
4815 }
4816 bool SelectMaskVal = (Idx < NumElts / 2);
4817 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4818 }
4819 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4820 MaskVals.size() == NumElts);
4821
4822 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4823 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4824 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4825 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4826 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4827 }
4828
4829 // Cap the cost at a value linear to the number of elements in the vector.
4830 // The default lowering is to use the stack. The vector store + scalar loads
4831 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4832 // being (at least) linear in LMUL. As a result, using the vslidedown
4833 // lowering for every element ends up being VL*LMUL..
4834 // TODO: Should we be directly costing the stack alternative? Doing so might
4835 // give us a more accurate upper bound.
4836 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4837
4838 // TODO: unify with TTI getSlideCost.
4839 InstructionCost PerSlideCost = 1;
4840 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4841 default: break;
4842 case RISCVVType::LMUL_2:
4843 PerSlideCost = 2;
4844 break;
4845 case RISCVVType::LMUL_4:
4846 PerSlideCost = 4;
4847 break;
4848 case RISCVVType::LMUL_8:
4849 PerSlideCost = 8;
4850 break;
4851 }
4852
4853 // TODO: Should we be using the build instseq then cost + evaluate scheme
4854 // we use for integer constants here?
4855 unsigned UndefCount = 0;
4856 for (const SDValue &V : Op->ops()) {
4857 if (V.isUndef()) {
4858 UndefCount++;
4859 continue;
4860 }
4861 if (UndefCount) {
4862 LinearBudget -= PerSlideCost;
4863 UndefCount = 0;
4864 }
4865 LinearBudget -= PerSlideCost;
4866 }
4867 if (UndefCount) {
4868 LinearBudget -= PerSlideCost;
4869 }
4870
4871 if (LinearBudget < 0)
4872 return SDValue();
4873
4874 assert((!VT.isFloatingPoint() ||
4875 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4876 "Illegal type which will result in reserved encoding");
4877
4878 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4879
4880 // General case: splat the first operand and slide other operands down one
4881 // by one to form a vector. Alternatively, if every operand is an
4882 // extraction from element 0 of a vector, we use that vector from the last
4883 // extraction as the start value and slide up instead of slide down. Such that
4884 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4885 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4886 // something we cannot do with vslide1down/vslidedown.
4887 // Of course, using vslide1up/vslideup might increase the register pressure,
4888 // and that's why we conservatively limit to cases where every operand is an
4889 // extraction from the first element.
4890 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4891 SDValue EVec;
4892 bool SlideUp = false;
4893 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4894 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4895 if (SlideUp)
4896 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4897 Mask, VL, Policy);
4898 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4899 Mask, VL, Policy);
4900 };
4901
4902 // The reason we don't use all_of here is because we're also capturing EVec
4903 // from the last non-undef operand. If the std::execution_policy of the
4904 // underlying std::all_of is anything but std::sequenced_policy we might
4905 // capture the wrong EVec.
4906 for (SDValue V : Operands) {
4907 using namespace SDPatternMatch;
4908 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4909 if (!SlideUp)
4910 break;
4911 }
4912
4913 // Do not slideup if the element type of EVec is different.
4914 if (SlideUp) {
4915 MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();
4916 MVT ContainerEltVT = ContainerVT.getVectorElementType();
4917 if (EVecEltVT != ContainerEltVT)
4918 SlideUp = false;
4919 }
4920
4921 if (SlideUp) {
4922 MVT EVecContainerVT = EVec.getSimpleValueType();
4923 // Make sure the original vector has scalable vector type.
4924 if (EVecContainerVT.isFixedLengthVector()) {
4925 EVecContainerVT =
4926 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4927 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4928 }
4929
4930 // Adapt EVec's type into ContainerVT.
4931 if (EVecContainerVT.getVectorMinNumElements() <
4932 ContainerVT.getVectorMinNumElements())
4933 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4934 else
4935 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4936
4937 // Reverse the elements as we're going to slide up from the last element.
4938 std::reverse(Operands.begin(), Operands.end());
4939 }
4940
4941 SDValue Vec;
4942 UndefCount = 0;
4943 for (SDValue V : Operands) {
4944 if (V.isUndef()) {
4945 UndefCount++;
4946 continue;
4947 }
4948
4949 // Start our sequence with either a TA splat or extract source in the
4950 // hopes that hardware is able to recognize there's no dependency on the
4951 // prior value of our temporary register.
4952 if (!Vec) {
4953 if (SlideUp) {
4954 Vec = EVec;
4955 } else {
4956 Vec = DAG.getSplatVector(VT, DL, V);
4957 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4958 }
4959
4960 UndefCount = 0;
4961 continue;
4962 }
4963
4964 if (UndefCount) {
4965 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4966 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4967 VL);
4968 UndefCount = 0;
4969 }
4970
4971 unsigned Opcode;
4972 if (VT.isFloatingPoint())
4973 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4974 else
4975 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4976
4977 if (!VT.isFloatingPoint())
4978 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4979 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4980 V, Mask, VL);
4981 }
4982 if (UndefCount) {
4983 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4984 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4985 VL);
4986 }
4987 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4988}
4989
4990static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4992 SelectionDAG &DAG) {
4993 if (!Passthru)
4994 Passthru = DAG.getUNDEF(VT);
4996 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4997 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4998 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4999 // node in order to try and match RVV vector/scalar instructions.
5000 if ((LoC >> 31) == HiC)
5001 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
5002
5003 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
5004 // VL. This can temporarily increase VL if VL less than VLMAX.
5005 if (LoC == HiC) {
5006 SDValue NewVL;
5007 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
5008 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
5009 else
5010 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
5011 MVT InterVT =
5012 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
5013 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
5014 DAG.getUNDEF(InterVT), Lo, NewVL);
5015 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
5016 }
5017 }
5018
5019 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
5020 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
5021 isa<ConstantSDNode>(Hi.getOperand(1)) &&
5022 Hi.getConstantOperandVal(1) == 31)
5023 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
5024
5025 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
5026 // even if it might be sign extended.
5027 if (Hi.isUndef())
5028 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
5029
5030 // Fall back to a stack store and stride x0 vector load.
5031 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
5032 Hi, VL);
5033}
5034
5035// Called by type legalization to handle splat of i64 on RV32.
5036// FIXME: We can optimize this when the type has sign or zero bits in one
5037// of the halves.
5038static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
5039 SDValue Scalar, SDValue VL,
5040 SelectionDAG &DAG) {
5041 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
5042 SDValue Lo, Hi;
5043 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
5044 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
5045}
5046
5047// This function lowers a splat of a scalar operand Splat with the vector
5048// length VL. It ensures the final sequence is type legal, which is useful when
5049// lowering a splat after type legalization.
5050static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
5051 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
5052 const RISCVSubtarget &Subtarget) {
5053 bool HasPassthru = Passthru && !Passthru.isUndef();
5054 if (!HasPassthru && !Passthru)
5055 Passthru = DAG.getUNDEF(VT);
5056
5057 MVT EltVT = VT.getVectorElementType();
5058 MVT XLenVT = Subtarget.getXLenVT();
5059
5060 if (VT.isFloatingPoint()) {
5061 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
5062 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
5063 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
5064 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
5065 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
5066 else
5067 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
5068 MVT IVT = VT.changeVectorElementType(MVT::i16);
5069 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
5070 SDValue Splat =
5071 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
5072 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
5073 }
5074 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
5075 }
5076
5077 // Simplest case is that the operand needs to be promoted to XLenVT.
5078 if (Scalar.getValueType().bitsLE(XLenVT)) {
5079 // If the operand is a constant, sign extend to increase our chances
5080 // of being able to use a .vi instruction. ANY_EXTEND would become a
5081 // a zero extend and the simm5 check in isel would fail.
5082 // FIXME: Should we ignore the upper bits in isel instead?
5083 unsigned ExtOpc =
5085 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
5086 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
5087 }
5088
5089 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
5090 "Unexpected scalar for splat lowering!");
5091
5092 if (isOneConstant(VL) && isNullConstant(Scalar))
5093 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
5094 DAG.getConstant(0, DL, XLenVT), VL);
5095
5096 // Otherwise use the more complicated splatting algorithm.
5097 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
5098}
5099
5100// This function lowers an insert of a scalar operand Scalar into lane
5101// 0 of the vector regardless of the value of VL. The contents of the
5102// remaining lanes of the result vector are unspecified. VL is assumed
5103// to be non-zero.
5105 const SDLoc &DL, SelectionDAG &DAG,
5106 const RISCVSubtarget &Subtarget) {
5107 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
5108
5109 const MVT XLenVT = Subtarget.getXLenVT();
5110 SDValue Passthru = DAG.getUNDEF(VT);
5111
5112 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5113 isNullConstant(Scalar.getOperand(1))) {
5114 SDValue ExtractedVal = Scalar.getOperand(0);
5115 // The element types must be the same.
5116 if (ExtractedVal.getValueType().getVectorElementType() ==
5117 VT.getVectorElementType()) {
5118 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
5119 MVT ExtractedContainerVT = ExtractedVT;
5120 if (ExtractedContainerVT.isFixedLengthVector()) {
5121 ExtractedContainerVT = getContainerForFixedLengthVector(
5122 DAG, ExtractedContainerVT, Subtarget);
5123 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
5124 ExtractedVal, DAG, Subtarget);
5125 }
5126 if (ExtractedContainerVT.bitsLE(VT))
5127 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
5128 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
5129 }
5130 }
5131
5132 if (VT.isFloatingPoint())
5133 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
5134 VL);
5135
5136 // Avoid the tricky legalization cases by falling back to using the
5137 // splat code which already handles it gracefully.
5138 if (!Scalar.getValueType().bitsLE(XLenVT))
5139 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
5140 DAG.getConstant(1, DL, XLenVT),
5141 VT, DL, DAG, Subtarget);
5142
5143 // If the operand is a constant, sign extend to increase our chances
5144 // of being able to use a .vi instruction. ANY_EXTEND would become a
5145 // a zero extend and the simm5 check in isel would fail.
5146 // FIXME: Should we ignore the upper bits in isel instead?
5147 unsigned ExtOpc =
5149 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
5150 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
5151 VL);
5152}
5153
5154/// If concat_vector(V1,V2) could be folded away to some existing
5155/// vector source, return it. Note that the source may be larger
5156/// than the requested concat_vector (i.e. a extract_subvector
5157/// might be required.)
5159 EVT VT = V1.getValueType();
5160 assert(VT == V2.getValueType() && "argument types must match");
5161 // Both input must be extracts.
5162 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
5164 return SDValue();
5165
5166 // Extracting from the same source.
5167 SDValue Src = V1.getOperand(0);
5168 if (Src != V2.getOperand(0) ||
5169 VT.isScalableVector() != Src.getValueType().isScalableVector())
5170 return SDValue();
5171
5172 // The extracts must extract the two halves of the source.
5173 if (V1.getConstantOperandVal(1) != 0 ||
5175 return SDValue();
5176
5177 return Src;
5178}
5179
5180// Can this shuffle be performed on exactly one (possibly larger) input?
5182
5183 if (V2.isUndef())
5184 return V1;
5185
5186 unsigned NumElts = VT.getVectorNumElements();
5187 // Src needs to have twice the number of elements.
5188 // TODO: Update shuffle lowering to add the extract subvector
5189 if (SDValue Src = foldConcatVector(V1, V2);
5190 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
5191 return Src;
5192
5193 return SDValue();
5194}
5195
5196/// Is this shuffle interleaving contiguous elements from one vector into the
5197/// even elements and contiguous elements from another vector into the odd
5198/// elements. \p EvenSrc will contain the element that should be in the first
5199/// even element. \p OddSrc will contain the element that should be in the first
5200/// odd element. These can be the first element in a source or the element half
5201/// way through the source.
5202static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
5203 int &OddSrc, const RISCVSubtarget &Subtarget) {
5204 // We need to be able to widen elements to the next larger integer type or
5205 // use the zip2a instruction at e64.
5206 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
5207 !Subtarget.hasVendorXRivosVizip())
5208 return false;
5209
5210 int Size = Mask.size();
5211 int NumElts = VT.getVectorNumElements();
5212 assert(Size == (int)NumElts && "Unexpected mask size");
5213
5214 SmallVector<unsigned, 2> StartIndexes;
5215 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
5216 return false;
5217
5218 EvenSrc = StartIndexes[0];
5219 OddSrc = StartIndexes[1];
5220
5221 // One source should be low half of first vector.
5222 if (EvenSrc != 0 && OddSrc != 0)
5223 return false;
5224
5225 // Subvectors will be subtracted from either at the start of the two input
5226 // vectors, or at the start and middle of the first vector if it's an unary
5227 // interleave.
5228 // In both cases, HalfNumElts will be extracted.
5229 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
5230 // we'll create an illegal extract_subvector.
5231 // FIXME: We could support other values using a slidedown first.
5232 int HalfNumElts = NumElts / 2;
5233 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
5234}
5235
5236/// Is this mask representing a masked combination of two slides?
5238 std::array<std::pair<int, int>, 2> &SrcInfo) {
5239 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
5240 return false;
5241
5242 // Avoid matching vselect idioms
5243 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
5244 return false;
5245 // Prefer vslideup as the second instruction, and identity
5246 // only as the initial instruction.
5247 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
5248 SrcInfo[1].second == 0)
5249 std::swap(SrcInfo[0], SrcInfo[1]);
5250 assert(SrcInfo[0].first != -1 && "Must find one slide");
5251 return true;
5252}
5253
5254// Exactly matches the semantics of a previously existing custom matcher
5255// to allow migration to new matcher without changing output.
5256static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
5257 unsigned NumElts) {
5258 if (SrcInfo[1].first == -1)
5259 return true;
5260 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
5261 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
5262}
5263
5264static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
5265 ArrayRef<int> Mask, unsigned Factor,
5266 bool RequiredPolarity) {
5267 int NumElts = Mask.size();
5268 for (const auto &[Idx, M] : enumerate(Mask)) {
5269 if (M < 0)
5270 continue;
5271 int Src = M >= NumElts;
5272 int Diff = (int)Idx - (M % NumElts);
5273 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
5274 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
5275 "Must match exactly one of the two slides");
5276 if (RequiredPolarity != (C == (Idx / Factor) % 2))
5277 return false;
5278 }
5279 return true;
5280}
5281
5282/// Given a shuffle which can be represented as a pair of two slides,
5283/// see if it is a zipeven idiom. Zipeven is:
5284/// vs2: a0 a1 a2 a3
5285/// vs1: b0 b1 b2 b3
5286/// vd: a0 b0 a2 b2
5287static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
5288 ArrayRef<int> Mask, unsigned &Factor) {
5289 Factor = SrcInfo[1].second;
5290 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5291 Mask.size() % Factor == 0 &&
5292 isAlternating(SrcInfo, Mask, Factor, true);
5293}
5294
5295/// Given a shuffle which can be represented as a pair of two slides,
5296/// see if it is a zipodd idiom. Zipodd is:
5297/// vs2: a0 a1 a2 a3
5298/// vs1: b0 b1 b2 b3
5299/// vd: a1 b1 a3 b3
5300/// Note that the operand order is swapped due to the way we canonicalize
5301/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
5302static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
5303 ArrayRef<int> Mask, unsigned &Factor) {
5304 Factor = -SrcInfo[1].second;
5305 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5306 Mask.size() % Factor == 0 &&
5307 isAlternating(SrcInfo, Mask, Factor, false);
5308}
5309
5310// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
5311// 2, 4, 8 and the integer type Factor-times larger than VT's
5312// element type must be a legal element type.
5313// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
5314// -> [p, q, r, s] (Factor=2, Index=1)
5316 SDValue Src, unsigned Factor,
5317 unsigned Index, SelectionDAG &DAG) {
5318 unsigned EltBits = VT.getScalarSizeInBits();
5319 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
5320 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
5321 SrcEC.divideCoefficientBy(Factor));
5322 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
5323 SrcEC.divideCoefficientBy(Factor));
5324 Src = DAG.getBitcast(WideSrcVT, Src);
5325
5326 unsigned Shift = Index * EltBits;
5327 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
5328 DAG.getConstant(Shift, DL, WideSrcVT));
5329 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
5331 Res = DAG.getBitcast(CastVT, Res);
5332 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
5333}
5334
5335/// Match a single source shuffle which is an identity except that some
5336/// particular element is repeated. This can be lowered as a masked
5337/// vrgather.vi/vx. Note that the two source form of this is handled
5338/// by the recursive splitting logic and doesn't need special handling.
5340 const RISCVSubtarget &Subtarget,
5341 SelectionDAG &DAG) {
5342
5343 SDLoc DL(SVN);
5344 MVT VT = SVN->getSimpleValueType(0);
5345 SDValue V1 = SVN->getOperand(0);
5346 assert(SVN->getOperand(1).isUndef());
5347 ArrayRef<int> Mask = SVN->getMask();
5348 const unsigned NumElts = VT.getVectorNumElements();
5349 MVT XLenVT = Subtarget.getXLenVT();
5350
5351 std::optional<int> SplatIdx;
5352 for (auto [I, M] : enumerate(Mask)) {
5353 if (M == -1 || I == (unsigned)M)
5354 continue;
5355 if (SplatIdx && *SplatIdx != M)
5356 return SDValue();
5357 SplatIdx = M;
5358 }
5359
5360 if (!SplatIdx)
5361 return SDValue();
5362
5363 SmallVector<SDValue> MaskVals;
5364 for (int MaskIndex : Mask) {
5365 bool SelectMaskVal = MaskIndex == *SplatIdx;
5366 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5367 }
5368 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5369 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5370 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5371 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5372 SmallVector<int>(NumElts, *SplatIdx));
5373 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5374}
5375
5376// Lower the following shuffle to vslidedown.
5377// a)
5378// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5379// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5380// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5381// b)
5382// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5383// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5384// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5385// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5386// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5387// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5389 SDValue V1, SDValue V2,
5390 ArrayRef<int> Mask,
5391 const RISCVSubtarget &Subtarget,
5392 SelectionDAG &DAG) {
5393 auto findNonEXTRACT_SUBVECTORParent =
5394 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5395 uint64_t Offset = 0;
5396 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5397 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5398 // a scalable vector. But we don't want to match the case.
5399 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5400 Offset += Parent.getConstantOperandVal(1);
5401 Parent = Parent.getOperand(0);
5402 }
5403 return std::make_pair(Parent, Offset);
5404 };
5405
5406 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5407 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5408
5409 // Extracting from the same source.
5410 SDValue Src = V1Src;
5411 if (Src != V2Src)
5412 return SDValue();
5413
5414 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5415 SmallVector<int, 16> NewMask(Mask);
5416 for (size_t i = 0; i != NewMask.size(); ++i) {
5417 if (NewMask[i] == -1)
5418 continue;
5419
5420 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5421 NewMask[i] = NewMask[i] + V1IndexOffset;
5422 } else {
5423 // Minus NewMask.size() is needed. Otherwise, the b case would be
5424 // <5,6,7,12> instead of <5,6,7,8>.
5425 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5426 }
5427 }
5428
5429 // First index must be known and non-zero. It will be used as the slidedown
5430 // amount.
5431 if (NewMask[0] <= 0)
5432 return SDValue();
5433
5434 // NewMask is also continuous.
5435 for (unsigned i = 1; i != NewMask.size(); ++i)
5436 if (NewMask[i - 1] + 1 != NewMask[i])
5437 return SDValue();
5438
5439 MVT XLenVT = Subtarget.getXLenVT();
5440 MVT SrcVT = Src.getSimpleValueType();
5441 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5442 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5443 SDValue Slidedown =
5444 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5445 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5446 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5447 return DAG.getExtractSubvector(
5448 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5449}
5450
5451// Because vslideup leaves the destination elements at the start intact, we can
5452// use it to perform shuffles that insert subvectors:
5453//
5454// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5455// ->
5456// vsetvli zero, 8, e8, mf2, ta, ma
5457// vslideup.vi v8, v9, 4
5458//
5459// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5460// ->
5461// vsetvli zero, 5, e8, mf2, tu, ma
5462// vslideup.v1 v8, v9, 2
5464 SDValue V1, SDValue V2,
5465 ArrayRef<int> Mask,
5466 const RISCVSubtarget &Subtarget,
5467 SelectionDAG &DAG) {
5468 unsigned NumElts = VT.getVectorNumElements();
5469 int NumSubElts, Index;
5470 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5471 Index))
5472 return SDValue();
5473
5474 bool OpsSwapped = Mask[Index] < (int)NumElts;
5475 SDValue InPlace = OpsSwapped ? V2 : V1;
5476 SDValue ToInsert = OpsSwapped ? V1 : V2;
5477
5478 MVT XLenVT = Subtarget.getXLenVT();
5479 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5480 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5481 // We slide up by the index that the subvector is being inserted at, and set
5482 // VL to the index + the number of elements being inserted.
5483 unsigned Policy =
5485 // If the we're adding a suffix to the in place vector, i.e. inserting right
5486 // up to the very end of it, then we don't actually care about the tail.
5487 if (NumSubElts + Index >= (int)NumElts)
5488 Policy |= RISCVVType::TAIL_AGNOSTIC;
5489
5490 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5491 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5492 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5493
5494 SDValue Res;
5495 // If we're inserting into the lowest elements, use a tail undisturbed
5496 // vmv.v.v.
5497 if (Index == 0)
5498 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5499 VL);
5500 else
5501 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5502 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5503 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5504}
5505
5506// A shuffle of shuffles where the final data only is drawn from 2 input ops
5507// can be compressed into a single shuffle
5509 const RISCVSubtarget &Subtarget,
5510 SelectionDAG &DAG) {
5511 SDValue V1 = SVN->getOperand(0);
5512 SDValue V2 = SVN->getOperand(1);
5513
5514 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE ||
5516 return SDValue();
5517
5518 if (!V1.hasOneUse() || !V2.hasOneUse())
5519 return SDValue();
5520
5521 ArrayRef<int> Mask = SVN->getMask();
5522 ArrayRef<int> V1Mask = cast<ShuffleVectorSDNode>(V1.getNode())->getMask();
5523 ArrayRef<int> V2Mask = cast<ShuffleVectorSDNode>(V2.getNode())->getMask();
5524 unsigned NumElts = Mask.size();
5525 SmallVector<int> NewMask(NumElts, -1);
5526 for (unsigned Idx : seq<unsigned>(NumElts)) {
5527 int Lane = Mask[Idx];
5528 // Don't assign if poison
5529 if (Lane == -1)
5530 continue;
5531 int OrigLane;
5532 bool SecondOp = false;
5533 if ((unsigned)Lane < NumElts) {
5534 OrigLane = V1Mask[Lane];
5535 } else {
5536 OrigLane = V2Mask[Lane - NumElts];
5537 SecondOp = true;
5538 }
5539 if (OrigLane == -1)
5540 continue;
5541 // Don't handle if shuffling from a second operand
5542 if ((unsigned)OrigLane >= NumElts)
5543 return SDValue();
5544 if (SecondOp)
5545 OrigLane += NumElts;
5546 NewMask[Idx] = OrigLane;
5547 }
5548
5549 MVT VT = SVN->getSimpleValueType(0);
5550 SDLoc DL(SVN);
5551
5552 return DAG.getVectorShuffle(VT, DL, V1->getOperand(0), V2->getOperand(0),
5553 NewMask);
5554}
5555
5556/// Match v(f)slide1up/down idioms. These operations involve sliding
5557/// N-1 elements to make room for an inserted scalar at one end.
5559 SDValue V1, SDValue V2,
5560 ArrayRef<int> Mask,
5561 const RISCVSubtarget &Subtarget,
5562 SelectionDAG &DAG) {
5563 bool OpsSwapped = false;
5564 if (!isa<BuildVectorSDNode>(V1)) {
5565 if (!isa<BuildVectorSDNode>(V2))
5566 return SDValue();
5567 std::swap(V1, V2);
5568 OpsSwapped = true;
5569 }
5570 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5571 if (!Splat)
5572 return SDValue();
5573
5574 // Return true if the mask could describe a slide of Mask.size() - 1
5575 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5576 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5577 const unsigned S = (Offset > 0) ? 0 : -Offset;
5578 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5579 for (unsigned i = S; i != E; ++i)
5580 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5581 return false;
5582 return true;
5583 };
5584
5585 const unsigned NumElts = VT.getVectorNumElements();
5586 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5587 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5588 return SDValue();
5589
5590 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5591 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5592 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5593 return SDValue();
5594
5595 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5596 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5597
5598 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5599 // vslide1{down,up}.vx instead.
5600 if (VT.getVectorElementType() == MVT::bf16 ||
5601 (VT.getVectorElementType() == MVT::f16 &&
5602 !Subtarget.hasVInstructionsF16())) {
5603 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5604 Splat =
5605 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5606 V2 = DAG.getBitcast(
5607 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5608 SDValue Vec = DAG.getNode(
5609 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5610 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5611 Vec = DAG.getBitcast(ContainerVT, Vec);
5612 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5613 }
5614
5615 auto OpCode = IsVSlidedown ?
5616 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5617 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5618 if (!VT.isFloatingPoint())
5619 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5620 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5621 DAG.getUNDEF(ContainerVT),
5622 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5623 Splat, TrueMask, VL);
5624 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5625}
5626
5627/// Match a mask which "spreads" the leading elements of a vector evenly
5628/// across the result. Factor is the spread amount, and Index is the
5629/// offset applied. (on success, Index < Factor) This is the inverse
5630/// of a deinterleave with the same Factor and Index. This is analogous
5631/// to an interleave, except that all but one lane is undef.
5633 unsigned &Index) {
5634 SmallVector<bool> LaneIsUndef(Factor, true);
5635 for (unsigned i = 0; i < Mask.size(); i++)
5636 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5637
5638 bool Found = false;
5639 for (unsigned i = 0; i < Factor; i++) {
5640 if (LaneIsUndef[i])
5641 continue;
5642 if (Found)
5643 return false;
5644 Index = i;
5645 Found = true;
5646 }
5647 if (!Found)
5648 return false;
5649
5650 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5651 unsigned j = i * Factor + Index;
5652 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5653 return false;
5654 }
5655 return true;
5656}
5657
5658static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5659 const SDLoc &DL, SelectionDAG &DAG,
5660 const RISCVSubtarget &Subtarget) {
5661 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5662 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5663 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5665
5666 MVT VT = Op0.getSimpleValueType();
5668 Op0 = DAG.getBitcast(IntVT, Op0);
5669 Op1 = DAG.getBitcast(IntVT, Op1);
5670
5671 MVT ContainerVT = IntVT;
5672 if (VT.isFixedLengthVector()) {
5673 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5674 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5675 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5676 }
5677
5678 MVT InnerVT = ContainerVT;
5679 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5680 if (Op1.isUndef() &&
5681 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5682 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5683 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5684 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5685 Subtarget.getXLenVT());
5686 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5687 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5688 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5689 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5690 }
5691
5692 SDValue Passthru = DAG.getUNDEF(InnerVT);
5693 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5694 if (InnerVT.bitsLT(ContainerVT))
5695 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5696 if (IntVT.isFixedLengthVector())
5697 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5698 Res = DAG.getBitcast(VT, Res);
5699 return Res;
5700}
5701
5702// Given a vector a, b, c, d return a vector Factor times longer
5703// with Factor-1 undef's between elements. Ex:
5704// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5705// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5706static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5707 const SDLoc &DL, SelectionDAG &DAG) {
5708
5709 MVT VT = V.getSimpleValueType();
5710 unsigned EltBits = VT.getScalarSizeInBits();
5712 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5713
5714 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5715
5716 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5717 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5718 // allow the SHL to fold away if Index is 0.
5719 if (Index != 0)
5720 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5721 DAG.getConstant(EltBits * Index, DL, WideVT));
5722 // Make sure to use original element type
5724 EC.multiplyCoefficientBy(Factor));
5725 return DAG.getBitcast(ResultVT, Result);
5726}
5727
5728// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5729// to create an interleaved vector of <[vscale x] n*2 x ty>.
5730// This requires that the size of ty is less than the subtarget's maximum ELEN.
5732 const SDLoc &DL, SelectionDAG &DAG,
5733 const RISCVSubtarget &Subtarget) {
5734
5735 // FIXME: Not only does this optimize the code, it fixes some correctness
5736 // issues because MIR does not have freeze.
5737 if (EvenV.isUndef())
5738 return getWideningSpread(OddV, 2, 1, DL, DAG);
5739 if (OddV.isUndef())
5740 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5741
5742 MVT VecVT = EvenV.getSimpleValueType();
5743 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5744 // Convert fixed vectors to scalable if needed
5745 if (VecContainerVT.isFixedLengthVector()) {
5746 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5747 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5748 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5749 }
5750
5751 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5752
5753 // We're working with a vector of the same size as the resulting
5754 // interleaved vector, but with half the number of elements and
5755 // twice the SEW (Hence the restriction on not using the maximum
5756 // ELEN)
5757 MVT WideVT =
5759 VecVT.getVectorElementCount());
5760 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5761 if (WideContainerVT.isFixedLengthVector())
5762 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5763
5764 // Bitcast the input vectors to integers in case they are FP
5765 VecContainerVT = VecContainerVT.changeTypeToInteger();
5766 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5767 OddV = DAG.getBitcast(VecContainerVT, OddV);
5768
5769 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5770 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5771
5772 SDValue Interleaved;
5773 if (Subtarget.hasStdExtZvbb()) {
5774 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5775 SDValue OffsetVec =
5776 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5777 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5778 OffsetVec, Passthru, Mask, VL);
5779 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5780 Interleaved, EvenV, Passthru, Mask, VL);
5781 } else {
5782 // FIXME: We should freeze the odd vector here. We already handled the case
5783 // of provably undef/poison above.
5784
5785 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5786 // vwaddu.vv
5787 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5788 OddV, Passthru, Mask, VL);
5789
5790 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5791 SDValue AllOnesVec = DAG.getSplatVector(
5792 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5793 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5794 OddV, AllOnesVec, Passthru, Mask, VL);
5795
5796 // Add the two together so we get
5797 // (OddV * 0xff...ff) + (OddV + EvenV)
5798 // = (OddV * 0x100...00) + EvenV
5799 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5800 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5801 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5802 Interleaved, OddsMul, Passthru, Mask, VL);
5803 }
5804
5805 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5806 MVT ResultContainerVT = MVT::getVectorVT(
5807 VecVT.getVectorElementType(), // Make sure to use original type
5808 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5809 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5810
5811 // Convert back to a fixed vector if needed
5812 MVT ResultVT =
5815 if (ResultVT.isFixedLengthVector())
5816 Interleaved =
5817 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5818
5819 return Interleaved;
5820}
5821
5822// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5823// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5825 SelectionDAG &DAG,
5826 const RISCVSubtarget &Subtarget) {
5827 SDLoc DL(SVN);
5828 MVT VT = SVN->getSimpleValueType(0);
5829 SDValue V = SVN->getOperand(0);
5830 unsigned NumElts = VT.getVectorNumElements();
5831
5832 assert(VT.getVectorElementType() == MVT::i1);
5833
5835 SVN->getMask().size()) ||
5836 !SVN->getOperand(1).isUndef())
5837 return SDValue();
5838
5839 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5840 EVT ViaVT = EVT::getVectorVT(
5841 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5842 EVT ViaBitVT =
5843 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5844
5845 // If we don't have zvbb or the larger element type > ELEN, the operation will
5846 // be illegal.
5848 ViaVT) ||
5849 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5850 return SDValue();
5851
5852 // If the bit vector doesn't fit exactly into the larger element type, we need
5853 // to insert it into the larger vector and then shift up the reversed bits
5854 // afterwards to get rid of the gap introduced.
5855 if (ViaEltSize > NumElts)
5856 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5857
5858 SDValue Res =
5859 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5860
5861 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5862 // element type.
5863 if (ViaEltSize > NumElts)
5864 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5865 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5866
5867 Res = DAG.getBitcast(ViaBitVT, Res);
5868
5869 if (ViaEltSize > NumElts)
5870 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5871 return Res;
5872}
5873
5875 const RISCVSubtarget &Subtarget,
5876 MVT &RotateVT, unsigned &RotateAmt) {
5877 unsigned NumElts = VT.getVectorNumElements();
5878 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5879 unsigned NumSubElts;
5880 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5881 NumElts, NumSubElts, RotateAmt))
5882 return false;
5883 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5884 NumElts / NumSubElts);
5885
5886 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5887 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5888}
5889
5890// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5891// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5892// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5894 SelectionDAG &DAG,
5895 const RISCVSubtarget &Subtarget) {
5896 SDLoc DL(SVN);
5897
5898 EVT VT = SVN->getValueType(0);
5899 unsigned RotateAmt;
5900 MVT RotateVT;
5901 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5902 return SDValue();
5903
5904 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5905
5906 SDValue Rotate;
5907 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5908 // so canonicalize to vrev8.
5909 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5910 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5911 else
5912 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5913 DAG.getConstant(RotateAmt, DL, RotateVT));
5914
5915 return DAG.getBitcast(VT, Rotate);
5916}
5917
5918// If compiling with an exactly known VLEN, see if we can split a
5919// shuffle on m2 or larger into a small number of m1 sized shuffles
5920// which write each destination registers exactly once.
5922 SelectionDAG &DAG,
5923 const RISCVSubtarget &Subtarget) {
5924 SDLoc DL(SVN);
5925 MVT VT = SVN->getSimpleValueType(0);
5926 SDValue V1 = SVN->getOperand(0);
5927 SDValue V2 = SVN->getOperand(1);
5928 ArrayRef<int> Mask = SVN->getMask();
5929
5930 // If we don't know exact data layout, not much we can do. If this
5931 // is already m1 or smaller, no point in splitting further.
5932 const auto VLen = Subtarget.getRealVLen();
5933 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5934 return SDValue();
5935
5936 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5937 // expansion for.
5938 unsigned RotateAmt;
5939 MVT RotateVT;
5940 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5941 return SDValue();
5942
5943 MVT ElemVT = VT.getVectorElementType();
5944 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5945
5946 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5947 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5948 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5949 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5950 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5951 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5952 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5953 unsigned NumOfDestRegs = NumElts / NumOpElts;
5954 // The following semantically builds up a fixed length concat_vector
5955 // of the component shuffle_vectors. We eagerly lower to scalable here
5956 // to avoid DAG combining it back to a large shuffle_vector again.
5957 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5958 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5960 Operands;
5962 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5963 [&]() { Operands.emplace_back(); },
5964 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5965 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5966 SmallVector<int>(SrcSubMask));
5967 },
5968 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5969 if (NewReg)
5970 Operands.emplace_back();
5971 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5972 });
5973 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5974 // Note: check that we do not emit too many shuffles here to prevent code
5975 // size explosion.
5976 // TODO: investigate, if it can be improved by extra analysis of the masks to
5977 // check if the code is more profitable.
5978 unsigned NumShuffles = std::accumulate(
5979 Operands.begin(), Operands.end(), 0u,
5980 [&](unsigned N,
5981 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5982 if (Data.empty())
5983 return N;
5984 N += Data.size();
5985 for (const auto &P : Data) {
5986 unsigned Idx2 = std::get<1>(P);
5987 ArrayRef<int> Mask = std::get<2>(P);
5988 if (Idx2 != UINT_MAX)
5989 ++N;
5990 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5991 --N;
5992 }
5993 return N;
5994 });
5995 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5996 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5997 return SDValue();
5998 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5999 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
6000 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
6001 return SubVec;
6002 };
6003 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
6005 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
6006 return SubVec;
6007 };
6008 SDValue Vec = DAG.getUNDEF(ContainerVT);
6009 for (auto [I, Data] : enumerate(Operands)) {
6010 if (Data.empty())
6011 continue;
6013 for (unsigned I : seq<unsigned>(Data.size())) {
6014 const auto &[Idx1, Idx2, _] = Data[I];
6015 // If the shuffle contains permutation of odd number of elements,
6016 // Idx1 might be used already in the first iteration.
6017 //
6018 // Idx1 = shuffle Idx1, Idx2
6019 // Idx1 = shuffle Idx1, Idx3
6020 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
6021 if (!V)
6022 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
6023 (Idx1 % NumOfSrcRegs) * NumOpElts);
6024 if (Idx2 != UINT_MAX) {
6025 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
6026 if (!V)
6027 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
6028 (Idx2 % NumOfSrcRegs) * NumOpElts);
6029 }
6030 }
6031 SDValue V;
6032 for (const auto &[Idx1, Idx2, Mask] : Data) {
6033 SDValue V1 = Values.at(Idx1);
6034 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
6035 V = PerformShuffle(V1, V2, Mask);
6036 Values[Idx1] = V;
6037 }
6038
6039 unsigned InsertIdx = I * NumOpElts;
6040 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
6041 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
6042 }
6043 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
6044}
6045
6046// Matches a subset of compress masks with a contiguous prefix of output
6047// elements. This could be extended to allow gaps by deciding which
6048// source elements to spuriously demand.
6050 int Last = -1;
6051 bool SawUndef = false;
6052 for (const auto &[Idx, M] : enumerate(Mask)) {
6053 if (M == -1) {
6054 SawUndef = true;
6055 continue;
6056 }
6057 if (SawUndef)
6058 return false;
6059 if (Idx > (unsigned)M)
6060 return false;
6061 if (M <= Last)
6062 return false;
6063 Last = M;
6064 }
6065 return true;
6066}
6067
6068/// Given a shuffle where the indices are disjoint between the two sources,
6069/// e.g.:
6070///
6071/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
6072///
6073/// Merge the two sources into one and do a single source shuffle:
6074///
6075/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
6076/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
6077///
6078/// A vselect will either be merged into a masked instruction or be lowered as a
6079/// vmerge.vvm, which is cheaper than a vrgather.vv.
6081 SelectionDAG &DAG,
6082 const RISCVSubtarget &Subtarget) {
6083 MVT VT = SVN->getSimpleValueType(0);
6084 MVT XLenVT = Subtarget.getXLenVT();
6085 SDLoc DL(SVN);
6086
6087 const ArrayRef<int> Mask = SVN->getMask();
6088
6089 // Work out which source each lane will come from.
6090 SmallVector<int, 16> Srcs(Mask.size(), -1);
6091
6092 for (int Idx : Mask) {
6093 if (Idx == -1)
6094 continue;
6095 unsigned SrcIdx = Idx % Mask.size();
6096 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
6097 if (Srcs[SrcIdx] == -1)
6098 // Mark this source as using this lane.
6099 Srcs[SrcIdx] = Src;
6100 else if (Srcs[SrcIdx] != Src)
6101 // The other source is using this lane: not disjoint.
6102 return SDValue();
6103 }
6104
6105 SmallVector<SDValue> SelectMaskVals;
6106 for (int Lane : Srcs) {
6107 if (Lane == -1)
6108 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
6109 else
6110 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
6111 }
6112 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
6113 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
6114 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
6115 SVN->getOperand(0), SVN->getOperand(1));
6116
6117 // Move all indices relative to the first source.
6118 SmallVector<int> NewMask(Mask.size());
6119 for (unsigned I = 0; I < Mask.size(); I++) {
6120 if (Mask[I] == -1)
6121 NewMask[I] = -1;
6122 else
6123 NewMask[I] = Mask[I] % Mask.size();
6124 }
6125
6126 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
6127}
6128
6129/// Is this mask local (i.e. elements only move within their local span), and
6130/// repeating (that is, the same rearrangement is being done within each span)?
6131static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
6132 // Require a prefix from the original mask until the consumer code
6133 // is adjusted to rewrite the mask instead of just taking a prefix.
6134 for (auto [I, M] : enumerate(Mask)) {
6135 if (M == -1)
6136 continue;
6137 if ((M / Span) != (int)(I / Span))
6138 return false;
6139 int SpanIdx = I % Span;
6140 int Expected = M % Span;
6141 if (Mask[SpanIdx] != Expected)
6142 return false;
6143 }
6144 return true;
6145}
6146
6147/// Is this mask only using elements from the first span of the input?
6148static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
6149 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
6150}
6151
6152/// Return true for a mask which performs an arbitrary shuffle within the first
6153/// span, and then repeats that same result across all remaining spans. Note
6154/// that this doesn't check if all the inputs come from a single span!
6155static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
6156 // Require a prefix from the original mask until the consumer code
6157 // is adjusted to rewrite the mask instead of just taking a prefix.
6158 for (auto [I, M] : enumerate(Mask)) {
6159 if (M == -1)
6160 continue;
6161 int SpanIdx = I % Span;
6162 if (Mask[SpanIdx] != M)
6163 return false;
6164 }
6165 return true;
6166}
6167
6168/// Try to widen element type to get a new mask value for a better permutation
6169/// sequence. This doesn't try to inspect the widened mask for profitability;
6170/// we speculate the widened form is equal or better. This has the effect of
6171/// reducing mask constant sizes - allowing cheaper materialization sequences
6172/// - and index sequence sizes - reducing register pressure and materialization
6173/// cost, at the cost of (possibly) an extra VTYPE toggle.
6175 SDLoc DL(Op);
6176 MVT VT = Op.getSimpleValueType();
6177 MVT ScalarVT = VT.getVectorElementType();
6178 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
6179 SDValue V0 = Op.getOperand(0);
6180 SDValue V1 = Op.getOperand(1);
6181 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
6182
6183 // Avoid wasted work leading to isTypeLegal check failing below
6184 if (ElementSize > 32)
6185 return SDValue();
6186
6187 SmallVector<int, 8> NewMask;
6188 if (!widenShuffleMaskElts(Mask, NewMask))
6189 return SDValue();
6190
6191 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
6192 : MVT::getIntegerVT(ElementSize * 2);
6193 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
6194 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
6195 return SDValue();
6196 V0 = DAG.getBitcast(NewVT, V0);
6197 V1 = DAG.getBitcast(NewVT, V1);
6198 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
6199}
6200
6202 const RISCVSubtarget &Subtarget) {
6203 SDValue V1 = Op.getOperand(0);
6204 SDValue V2 = Op.getOperand(1);
6205 SDLoc DL(Op);
6206 MVT XLenVT = Subtarget.getXLenVT();
6207 MVT VT = Op.getSimpleValueType();
6208 unsigned NumElts = VT.getVectorNumElements();
6210
6211 if (VT.getVectorElementType() == MVT::i1) {
6212 // Lower to a vror.vi of a larger element type if possible before we promote
6213 // i1s to i8s.
6214 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6215 return V;
6216 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
6217 return V;
6218
6219 // Promote i1 shuffle to i8 shuffle.
6220 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
6221 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
6222 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
6223 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
6224 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
6225 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
6226 ISD::SETNE);
6227 }
6228
6229 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6230
6231 // Store the return value in a single variable instead of structured bindings
6232 // so that we can pass it to GetSlide below, which cannot capture structured
6233 // bindings until C++20.
6234 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6235 auto [TrueMask, VL] = TrueMaskVL;
6236
6237 if (SVN->isSplat()) {
6238 const int Lane = SVN->getSplatIndex();
6239 if (Lane >= 0) {
6240 MVT SVT = VT.getVectorElementType();
6241
6242 // Turn splatted vector load into a strided load with an X0 stride.
6243 SDValue V = V1;
6244 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
6245 // with undef.
6246 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
6247 int Offset = Lane;
6248 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
6249 int OpElements =
6250 V.getOperand(0).getSimpleValueType().getVectorNumElements();
6251 V = V.getOperand(Offset / OpElements);
6252 Offset %= OpElements;
6253 }
6254
6255 // We need to ensure the load isn't atomic or volatile.
6256 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
6257 auto *Ld = cast<LoadSDNode>(V);
6258 Offset *= SVT.getStoreSize();
6259 SDValue NewAddr = DAG.getMemBasePlusOffset(
6260 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
6261
6262 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
6263 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
6264 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6265 SDValue IntID =
6266 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
6267 SDValue Ops[] = {Ld->getChain(),
6268 IntID,
6269 DAG.getUNDEF(ContainerVT),
6270 NewAddr,
6271 DAG.getRegister(RISCV::X0, XLenVT),
6272 VL};
6273 SDValue NewLoad = DAG.getMemIntrinsicNode(
6274 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
6276 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
6277 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
6278 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
6279 }
6280
6281 MVT SplatVT = ContainerVT;
6282
6283 // f16 with zvfhmin and bf16 need to use an integer scalar load.
6284 if (SVT == MVT::bf16 ||
6285 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
6286 SVT = MVT::i16;
6287 SplatVT = ContainerVT.changeVectorElementType(SVT);
6288 }
6289
6290 // Otherwise use a scalar load and splat. This will give the best
6291 // opportunity to fold a splat into the operation. ISel can turn it into
6292 // the x0 strided load if we aren't able to fold away the select.
6293 if (SVT.isFloatingPoint())
6294 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
6295 Ld->getPointerInfo().getWithOffset(Offset),
6296 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
6297 else
6298 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
6299 Ld->getPointerInfo().getWithOffset(Offset), SVT,
6300 Ld->getBaseAlign(),
6301 Ld->getMemOperand()->getFlags());
6303
6304 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
6305 : RISCVISD::VMV_V_X_VL;
6306 SDValue Splat =
6307 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
6308 Splat = DAG.getBitcast(ContainerVT, Splat);
6309 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
6310 }
6311
6312 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6313 assert(Lane < (int)NumElts && "Unexpected lane!");
6314 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
6315 V1, DAG.getConstant(Lane, DL, XLenVT),
6316 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6317 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6318 }
6319 }
6320
6321 // For exact VLEN m2 or greater, try to split to m1 operations if we
6322 // can split cleanly.
6323 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
6324 return V;
6325
6326 ArrayRef<int> Mask = SVN->getMask();
6327
6328 if (SDValue V =
6329 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
6330 return V;
6331
6332 if (SDValue V =
6333 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
6334 return V;
6335
6336 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
6337 // available.
6338 if (Subtarget.hasStdExtZvkb())
6339 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6340 return V;
6341
6342 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
6343 NumElts != 2)
6344 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
6345
6346 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
6347 // use shift and truncate to perform the shuffle.
6348 // TODO: For Factor=6, we can perform the first step of the deinterleave via
6349 // shift-and-trunc reducing total cost for everything except an mf8 result.
6350 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
6351 // to do the entire operation.
6352 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6353 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6354 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6355 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
6356 unsigned Index = 0;
6357 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
6358 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6359 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
6360 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
6361 if (1 < count_if(Mask,
6362 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6363 1 < count_if(Mask, [&Mask](int Idx) {
6364 return Idx >= (int)Mask.size();
6365 })) {
6366 // Narrow each source and concatenate them.
6367 // FIXME: For small LMUL it is better to concatenate first.
6368 MVT EltVT = VT.getVectorElementType();
6369 auto EltCnt = VT.getVectorElementCount();
6370 MVT SubVT =
6371 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
6372
6373 SDValue Lo =
6374 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
6375 SDValue Hi =
6376 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
6377
6378 SDValue Concat =
6381 if (Factor == 2)
6382 return Concat;
6383
6384 SDValue Vec = DAG.getUNDEF(VT);
6385 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
6386 }
6387 }
6388 }
6389 }
6390
6391 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
6392 // e64 which can't match above.
6393 unsigned Index = 0;
6394 if (Subtarget.hasVendorXRivosVizip() &&
6396 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6397 unsigned Opc =
6398 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
6399 if (V2.isUndef())
6400 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6401 if (auto VLEN = Subtarget.getRealVLen();
6402 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
6403 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6404 if (SDValue Src = foldConcatVector(V1, V2)) {
6405 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6406 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6407 SDValue Res =
6408 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6409 return DAG.getExtractSubvector(DL, VT, Res, 0);
6410 }
6411 // Deinterleave each source and concatenate them, or concat first, then
6412 // deinterleave.
6413 if (1 < count_if(Mask,
6414 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6415 1 < count_if(Mask,
6416 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6417
6418 const unsigned EltSize = VT.getScalarSizeInBits();
6419 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6420 if (NumElts < MinVLMAX) {
6421 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6422 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6423 SDValue Res =
6424 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6425 return DAG.getExtractSubvector(DL, VT, Res, 0);
6426 }
6427
6428 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6429 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6430
6431 MVT SubVT = VT.getHalfNumVectorElementsVT();
6432 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6433 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6434 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6435 }
6436 }
6437
6438 if (SDValue V =
6439 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6440 return V;
6441
6442 // Detect an interleave shuffle and lower to
6443 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6444 int EvenSrc, OddSrc;
6445 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6446 !(NumElts == 2 &&
6447 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6448 // Extract the halves of the vectors.
6449 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6450
6451 // Recognize if one half is actually undef; the matching above will
6452 // otherwise reuse the even stream for the undef one. This improves
6453 // spread(2) shuffles.
6454 bool LaneIsUndef[2] = { true, true};
6455 for (const auto &[Idx, M] : enumerate(Mask))
6456 LaneIsUndef[Idx % 2] &= (M == -1);
6457
6458 int Size = Mask.size();
6459 SDValue EvenV, OddV;
6460 if (LaneIsUndef[0]) {
6461 EvenV = DAG.getUNDEF(HalfVT);
6462 } else {
6463 assert(EvenSrc >= 0 && "Undef source?");
6464 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6465 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6466 }
6467
6468 if (LaneIsUndef[1]) {
6469 OddV = DAG.getUNDEF(HalfVT);
6470 } else {
6471 assert(OddSrc >= 0 && "Undef source?");
6472 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6473 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6474 }
6475
6476 // Prefer vzip2a if available.
6477 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6478 if (Subtarget.hasVendorXRivosVizip()) {
6479 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6480 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6481 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6482 }
6483 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6484 }
6485
6486 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6487 // instructions (in any combination) with masking on the second instruction.
6488 // Also handles masked slides into an identity source, and single slides
6489 // without masking. Avoid matching bit rotates (which are not also element
6490 // rotates) as slide pairs. This is a performance heuristic, not a
6491 // functional check.
6492 std::array<std::pair<int, int>, 2> SrcInfo;
6493 unsigned RotateAmt;
6494 MVT RotateVT;
6495 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6496 (isElementRotate(SrcInfo, NumElts) ||
6497 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6498 SDValue Sources[2];
6499 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6500 int SrcIdx = Info.first;
6501 assert(SrcIdx == 0 || SrcIdx == 1);
6502 SDValue &Src = Sources[SrcIdx];
6503 if (!Src) {
6504 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6505 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6506 }
6507 return Src;
6508 };
6509 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6510 SDValue Passthru) {
6511 auto [TrueMask, VL] = TrueMaskVL;
6512 SDValue SrcV = GetSourceFor(Src);
6513 int SlideAmt = Src.second;
6514 if (SlideAmt == 0) {
6515 // Should never be second operation
6516 assert(Mask == TrueMask);
6517 return SrcV;
6518 }
6519 if (SlideAmt < 0)
6520 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6521 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6523 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6524 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6526 };
6527
6528 if (SrcInfo[1].first == -1) {
6529 SDValue Res = DAG.getUNDEF(ContainerVT);
6530 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6531 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6532 }
6533
6534 if (Subtarget.hasVendorXRivosVizip()) {
6535 bool TryWiden = false;
6536 unsigned Factor;
6537 if (isZipEven(SrcInfo, Mask, Factor)) {
6538 if (Factor == 1) {
6539 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6540 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6541 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6542 Subtarget);
6543 }
6544 TryWiden = true;
6545 }
6546 if (isZipOdd(SrcInfo, Mask, Factor)) {
6547 if (Factor == 1) {
6548 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6549 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6550 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6551 Subtarget);
6552 }
6553 TryWiden = true;
6554 }
6555 // If we found a widening oppurtunity which would let us form a
6556 // zipeven or zipodd, use the generic code to widen the shuffle
6557 // and recurse through this logic.
6558 if (TryWiden)
6559 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6560 return V;
6561 }
6562
6563 // Build the mask. Note that vslideup unconditionally preserves elements
6564 // below the slide amount in the destination, and thus those elements are
6565 // undefined in the mask. If the mask ends up all true (or undef), it
6566 // will be folded away by general logic.
6567 SmallVector<SDValue> MaskVals;
6568 for (const auto &[Idx, M] : enumerate(Mask)) {
6569 if (M < 0 ||
6570 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6571 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6572 continue;
6573 }
6574 int Src = M >= (int)NumElts;
6575 int Diff = (int)Idx - (M % NumElts);
6576 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6577 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6578 "Must match exactly one of the two slides");
6579 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6580 }
6581 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6582 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6583 SDValue SelectMask = convertToScalableVector(
6584 ContainerVT.changeVectorElementType(MVT::i1),
6585 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6586
6587 SDValue Res = DAG.getUNDEF(ContainerVT);
6588 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6589 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6590 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6591 }
6592
6593 // Handle any remaining single source shuffles
6594 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6595 if (V2.isUndef()) {
6596 // We might be able to express the shuffle as a bitrotate. But even if we
6597 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6598 // shifts and a vor will have a higher throughput than a vrgather.
6599 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6600 return V;
6601
6602 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6603 return V;
6604
6605 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6606 // is fully covered in interleave(2) above, so it is ignored here.
6607 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6608 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6609 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6610 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6611 unsigned Index;
6612 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6613 MVT NarrowVT =
6614 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6615 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6616 return getWideningSpread(Src, Factor, Index, DL, DAG);
6617 }
6618 }
6619 }
6620
6621 // If only a prefix of the source elements influence a prefix of the
6622 // destination elements, try to see if we can reduce the required LMUL
6623 unsigned MinVLen = Subtarget.getRealMinVLen();
6624 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6625 if (NumElts > MinVLMAX) {
6626 unsigned MaxIdx = 0;
6627 for (auto [I, M] : enumerate(Mask)) {
6628 if (M == -1)
6629 continue;
6630 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6631 }
6632 unsigned NewNumElts =
6633 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6634 if (NewNumElts != NumElts) {
6635 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6636 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6637 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6638 Mask.take_front(NewNumElts));
6639 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6640 }
6641 }
6642
6643 // Before hitting generic lowering fallbacks, try to widen the mask
6644 // to a wider SEW.
6645 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6646 return V;
6647
6648 // Can we generate a vcompress instead of a vrgather? These scale better
6649 // at high LMUL, at the cost of not being able to fold a following select
6650 // into them. The mask constants are also smaller than the index vector
6651 // constants, and thus easier to materialize.
6652 if (isCompressMask(Mask)) {
6653 SmallVector<SDValue> MaskVals(NumElts,
6654 DAG.getConstant(false, DL, XLenVT));
6655 for (auto Idx : Mask) {
6656 if (Idx == -1)
6657 break;
6658 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6659 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6660 }
6661 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6662 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6663 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6664 DAG.getUNDEF(VT));
6665 }
6666
6667 if (VT.getScalarSizeInBits() == 8 &&
6668 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6669 // On such a vector we're unable to use i8 as the index type.
6670 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6671 // may involve vector splitting if we're already at LMUL=8, or our
6672 // user-supplied maximum fixed-length LMUL.
6673 return SDValue();
6674 }
6675
6676 // Base case for the two operand recursion below - handle the worst case
6677 // single source shuffle.
6678 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6679 MVT IndexVT = VT.changeTypeToInteger();
6680 // Since we can't introduce illegal index types at this stage, use i16 and
6681 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6682 // than XLenVT.
6683 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6684 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6685 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6686 }
6687
6688 // If the mask allows, we can do all the index computation in 16 bits. This
6689 // requires less work and less register pressure at high LMUL, and creates
6690 // smaller constants which may be cheaper to materialize.
6691 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6692 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6693 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6694 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6695 }
6696
6697 MVT IndexContainerVT =
6698 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6699
6700 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6701 SmallVector<SDValue> GatherIndicesLHS;
6702 for (int MaskIndex : Mask) {
6703 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6704 GatherIndicesLHS.push_back(IsLHSIndex
6705 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6706 : DAG.getUNDEF(XLenVT));
6707 }
6708 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6709 LHSIndices =
6710 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6711 // At m1 and less, there's no point trying any of the high LMUL splitting
6712 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6713 if (NumElts <= MinVLMAX) {
6714 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6715 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6716 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6717 }
6718
6719 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6720 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6721 auto [InnerTrueMask, InnerVL] =
6722 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6723 int N =
6724 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6725 assert(isPowerOf2_32(N) && N <= 8);
6726
6727 // If we have a locally repeating mask, then we can reuse the first
6728 // register in the index register group for all registers within the
6729 // source register group. TODO: This generalizes to m2, and m4.
6730 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6731 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6732 SDValue Gather = DAG.getUNDEF(ContainerVT);
6733 for (int i = 0; i < N; i++) {
6734 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6735 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6736 SDValue SubVec =
6737 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6738 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6739 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6740 }
6741 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6742 }
6743
6744 // If we have a shuffle which only uses the first register in our source
6745 // register group, and repeats the same index across all spans, we can
6746 // use a single vrgather (and possibly some register moves).
6747 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6748 // which we can do a linear number of shuffles to form an m1 which
6749 // contains all the output elements.
6750 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6751 isSpanSplatShuffle(Mask, MinVLMAX)) {
6752 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6753 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6754 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6755 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6756 SDValue Gather = DAG.getUNDEF(ContainerVT);
6757 for (int i = 0; i < N; i++)
6758 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6759 M1VT.getVectorMinNumElements() * i);
6760 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6761 }
6762
6763 // If we have a shuffle which only uses the first register in our
6764 // source register group, we can do a linear number of m1 vrgathers
6765 // reusing the same source register (but with different indices)
6766 // TODO: This can be generalized for m2 or m4, or for any shuffle
6767 // for which we can do a vslidedown followed by this expansion.
6768 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6769 SDValue SlideAmt =
6770 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6771 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6772 SDValue Gather = DAG.getUNDEF(ContainerVT);
6773 for (int i = 0; i < N; i++) {
6774 if (i != 0)
6775 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6776 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6777 SlideAmt, TrueMask, VL);
6778 SDValue SubIndex =
6779 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6780 SDValue SubVec =
6781 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6782 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6783 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6784 M1VT.getVectorMinNumElements() * i);
6785 }
6786 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6787 }
6788
6789 // Fallback to generic vrgather if we can't find anything better.
6790 // On many machines, this will be O(LMUL^2)
6791 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6792 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6793 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6794 }
6795
6796 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6797 // merged with a second vrgather.
6798 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6799
6800 // Now construct the mask that will be used by the blended vrgather operation.
6801 // Construct the appropriate indices into each vector.
6802 for (int MaskIndex : Mask) {
6803 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6804 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6805 ? MaskIndex : -1);
6806 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6807 }
6808
6809 // If the mask indices are disjoint between the two sources, we can lower it
6810 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6811 // operands may end up being lowered to something cheaper than a vrgather.vv.
6812 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6813 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6814 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6815 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6816 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6817 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6818 return V;
6819
6820 // Before hitting generic lowering fallbacks, try to widen the mask
6821 // to a wider SEW.
6822 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6823 return V;
6824
6825 // Try to pick a profitable operand order.
6826 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6827 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6828
6829 // Recursively invoke lowering for each operand if we had two
6830 // independent single source shuffles, and then combine the result via a
6831 // vselect. Note that the vselect will likely be folded back into the
6832 // second permute (vrgather, or other) by the post-isel combine.
6833 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6834 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6835
6836 SmallVector<SDValue> MaskVals;
6837 for (int MaskIndex : Mask) {
6838 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6839 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6840 }
6841
6842 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6843 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6844 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6845
6846 if (SwapOps)
6847 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6848 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6849}
6850
6852 // Only support legal VTs for other shuffles for now.
6853 if (!isTypeLegal(VT) || !Subtarget.hasVInstructions())
6854 return false;
6855
6856 // Support splats for any type. These should type legalize well.
6858 return true;
6859
6860 const unsigned NumElts = M.size();
6861 MVT SVT = VT.getSimpleVT();
6862
6863 // Not for i1 vectors.
6864 if (SVT.getScalarType() == MVT::i1)
6865 return false;
6866
6867 std::array<std::pair<int, int>, 2> SrcInfo;
6868 int Dummy1, Dummy2;
6869 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6870 (::isMaskedSlidePair(M, SrcInfo) &&
6871 isElementRotate(SrcInfo, NumElts)) ||
6872 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6873}
6874
6875// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6876// the exponent.
6877SDValue
6878RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6879 SelectionDAG &DAG) const {
6880 MVT VT = Op.getSimpleValueType();
6881 unsigned EltSize = VT.getScalarSizeInBits();
6882 SDValue Src = Op.getOperand(0);
6883 SDLoc DL(Op);
6884 MVT ContainerVT = VT;
6885
6886 SDValue Mask, VL;
6887 if (Op->isVPOpcode()) {
6888 Mask = Op.getOperand(1);
6889 if (VT.isFixedLengthVector())
6890 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6891 Subtarget);
6892 VL = Op.getOperand(2);
6893 }
6894
6895 // We choose FP type that can represent the value if possible. Otherwise, we
6896 // use rounding to zero conversion for correct exponent of the result.
6897 // TODO: Use f16 for i8 when possible?
6898 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6899 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6900 FloatEltVT = MVT::f32;
6901 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6902
6903 // Legal types should have been checked in the RISCVTargetLowering
6904 // constructor.
6905 // TODO: Splitting may make sense in some cases.
6906 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6907 "Expected legal float type!");
6908
6909 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6910 // The trailing zero count is equal to log2 of this single bit value.
6911 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6912 SDValue Neg = DAG.getNegative(Src, DL, VT);
6913 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6914 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6915 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6916 Src, Mask, VL);
6917 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6918 }
6919
6920 // We have a legal FP type, convert to it.
6921 SDValue FloatVal;
6922 if (FloatVT.bitsGT(VT)) {
6923 if (Op->isVPOpcode())
6924 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6925 else
6926 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6927 } else {
6928 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6929 if (VT.isFixedLengthVector()) {
6930 ContainerVT = getContainerForFixedLengthVector(VT);
6931 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6932 }
6933 if (!Op->isVPOpcode())
6934 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6935 SDValue RTZRM =
6936 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6937 MVT ContainerFloatVT =
6938 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6939 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6940 Src, Mask, RTZRM, VL);
6941 if (VT.isFixedLengthVector())
6942 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6943 }
6944 // Bitcast to integer and shift the exponent to the LSB.
6945 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6946 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6947 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6948
6949 SDValue Exp;
6950 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6951 if (Op->isVPOpcode()) {
6952 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6953 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6954 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6955 } else {
6956 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6957 DAG.getConstant(ShiftAmt, DL, IntVT));
6958 if (IntVT.bitsLT(VT))
6959 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6960 else if (IntVT.bitsGT(VT))
6961 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6962 }
6963
6964 // The exponent contains log2 of the value in biased form.
6965 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6966 // For trailing zeros, we just need to subtract the bias.
6967 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6968 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6969 DAG.getConstant(ExponentBias, DL, VT));
6970 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6971 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6972 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6973
6974 // For leading zeros, we need to remove the bias and convert from log2 to
6975 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6976 unsigned Adjust = ExponentBias + (EltSize - 1);
6977 SDValue Res;
6978 if (Op->isVPOpcode())
6979 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6980 Mask, VL);
6981 else
6982 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6983
6984 // The above result with zero input equals to Adjust which is greater than
6985 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6986 if (Op.getOpcode() == ISD::CTLZ)
6987 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6988 else if (Op.getOpcode() == ISD::VP_CTLZ)
6989 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6990 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6991 return Res;
6992}
6993
6994SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6995 SelectionDAG &DAG) const {
6996 SDLoc DL(Op);
6997 MVT XLenVT = Subtarget.getXLenVT();
6998 SDValue Source = Op->getOperand(0);
6999 MVT SrcVT = Source.getSimpleValueType();
7000 SDValue Mask = Op->getOperand(1);
7001 SDValue EVL = Op->getOperand(2);
7002
7003 if (SrcVT.isFixedLengthVector()) {
7004 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
7005 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
7006 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
7007 Subtarget);
7008 SrcVT = ContainerVT;
7009 }
7010
7011 // Convert to boolean vector.
7012 if (SrcVT.getScalarType() != MVT::i1) {
7013 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
7014 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
7015 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
7016 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
7017 DAG.getUNDEF(SrcVT), Mask, EVL});
7018 }
7019
7020 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
7021 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
7022 // In this case, we can interpret poison as -1, so nothing to do further.
7023 return Res;
7024
7025 // Convert -1 to VL.
7026 SDValue SetCC =
7027 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
7028 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
7029 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
7030}
7031
7032// While RVV has alignment restrictions, we should always be able to load as a
7033// legal equivalently-sized byte-typed vector instead. This method is
7034// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
7035// the load is already correctly-aligned, it returns SDValue().
7036SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
7037 SelectionDAG &DAG) const {
7038 auto *Load = cast<LoadSDNode>(Op);
7039 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
7040
7042 Load->getMemoryVT(),
7043 *Load->getMemOperand()))
7044 return SDValue();
7045
7046 SDLoc DL(Op);
7047 MVT VT = Op.getSimpleValueType();
7048 unsigned EltSizeBits = VT.getScalarSizeInBits();
7049 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
7050 "Unexpected unaligned RVV load type");
7051 MVT NewVT =
7052 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
7053 assert(NewVT.isValid() &&
7054 "Expecting equally-sized RVV vector types to be legal");
7055 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
7056 Load->getPointerInfo(), Load->getBaseAlign(),
7057 Load->getMemOperand()->getFlags());
7058 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
7059}
7060
7061// While RVV has alignment restrictions, we should always be able to store as a
7062// legal equivalently-sized byte-typed vector instead. This method is
7063// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
7064// returns SDValue() if the store is already correctly aligned.
7065SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
7066 SelectionDAG &DAG) const {
7067 auto *Store = cast<StoreSDNode>(Op);
7068 assert(Store && Store->getValue().getValueType().isVector() &&
7069 "Expected vector store");
7070
7072 Store->getMemoryVT(),
7073 *Store->getMemOperand()))
7074 return SDValue();
7075
7076 SDLoc DL(Op);
7077 SDValue StoredVal = Store->getValue();
7078 MVT VT = StoredVal.getSimpleValueType();
7079 unsigned EltSizeBits = VT.getScalarSizeInBits();
7080 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
7081 "Unexpected unaligned RVV store type");
7082 MVT NewVT =
7083 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
7084 assert(NewVT.isValid() &&
7085 "Expecting equally-sized RVV vector types to be legal");
7086 StoredVal = DAG.getBitcast(NewVT, StoredVal);
7087 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
7088 Store->getPointerInfo(), Store->getBaseAlign(),
7089 Store->getMemOperand()->getFlags());
7090}
7091
7092// While RVV has alignment restrictions, we should always be able to load as a
7093// legal equivalently-sized byte-typed vector instead. This method is
7094// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
7095// the load is already correctly-aligned, it returns SDValue().
7096SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
7097 SelectionDAG &DAG) const {
7098 auto *Load = cast<VPLoadSDNode>(Op);
7099 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
7100
7102 Load->getMemoryVT(),
7103 *Load->getMemOperand()))
7104 return SDValue();
7105
7106 SDValue Mask = Load->getMask();
7107
7108 // FIXME: Handled masked loads somehow.
7110 return SDValue();
7111
7112 SDLoc DL(Op);
7113 MVT VT = Op.getSimpleValueType();
7114 unsigned EltSizeBits = VT.getScalarSizeInBits();
7115 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
7116 "Unexpected unaligned RVV load type");
7117 MVT NewVT =
7118 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
7119 assert(NewVT.isValid() &&
7120 "Expecting equally-sized RVV vector types to be legal");
7121
7122 SDValue VL = Load->getVectorLength();
7123 VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
7124 DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
7125
7126 MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
7127 SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
7128 DAG.getAllOnesConstant(DL, MaskVT), VL,
7129 Load->getPointerInfo(), Load->getBaseAlign(),
7130 Load->getMemOperand()->getFlags(), AAMDNodes());
7131 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
7132}
7133
7134// While RVV has alignment restrictions, we should always be able to store as a
7135// legal equivalently-sized byte-typed vector instead. This method is
7136// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
7137// It returns SDValue() if the store is already correctly aligned.
7138SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
7139 SelectionDAG &DAG) const {
7140 auto *Store = cast<VPStoreSDNode>(Op);
7141 assert(Store && Store->getValue().getValueType().isVector() &&
7142 "Expected vector store");
7143
7145 Store->getMemoryVT(),
7146 *Store->getMemOperand()))
7147 return SDValue();
7148
7149 SDValue Mask = Store->getMask();
7150
7151 // FIXME: Handled masked stores somehow.
7153 return SDValue();
7154
7155 SDLoc DL(Op);
7156 SDValue StoredVal = Store->getValue();
7157 MVT VT = StoredVal.getSimpleValueType();
7158 unsigned EltSizeBits = VT.getScalarSizeInBits();
7159 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
7160 "Unexpected unaligned RVV store type");
7161 MVT NewVT =
7162 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
7163 assert(NewVT.isValid() &&
7164 "Expecting equally-sized RVV vector types to be legal");
7165
7166 SDValue VL = Store->getVectorLength();
7167 VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
7168 DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
7169
7170 StoredVal = DAG.getBitcast(NewVT, StoredVal);
7171
7172 LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
7173 MachineFunction &MF = DAG.getMachineFunction();
7174 MachineMemOperand *MMO = MF.getMachineMemOperand(
7175 Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
7176 Store->getBaseAlign());
7177
7178 MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
7179 return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
7180 DAG.getUNDEF(Store->getBasePtr().getValueType()),
7181 DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO,
7183}
7184
7186 const RISCVSubtarget &Subtarget) {
7187 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
7188
7189 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
7190
7191 // All simm32 constants should be handled by isel.
7192 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
7193 // this check redundant, but small immediates are common so this check
7194 // should have better compile time.
7195 if (isInt<32>(Imm))
7196 return Op;
7197
7198 // We only need to cost the immediate, if constant pool lowering is enabled.
7199 if (!Subtarget.useConstantPoolForLargeInts())
7200 return Op;
7201
7203 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
7204 return Op;
7205
7206 // Optimizations below are disabled for opt size. If we're optimizing for
7207 // size, use a constant pool.
7208 if (DAG.shouldOptForSize())
7209 return SDValue();
7210
7211 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
7212 // that if it will avoid a constant pool.
7213 // It will require an extra temporary register though.
7214 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
7215 // low and high 32 bits are the same and bit 31 and 63 are set.
7216 unsigned ShiftAmt, AddOpc;
7217 RISCVMatInt::InstSeq SeqLo =
7218 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
7219 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
7220 return Op;
7221
7222 return SDValue();
7223}
7224
7225SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
7226 SelectionDAG &DAG) const {
7227 MVT VT = Op.getSimpleValueType();
7228 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
7229
7230 // Can this constant be selected by a Zfa FLI instruction?
7231 bool Negate = false;
7232 int Index = getLegalZfaFPImm(Imm, VT);
7233
7234 // If the constant is negative, try negating.
7235 if (Index < 0 && Imm.isNegative()) {
7236 Index = getLegalZfaFPImm(-Imm, VT);
7237 Negate = true;
7238 }
7239
7240 // If we couldn't find a FLI lowering, fall back to generic code.
7241 if (Index < 0)
7242 return SDValue();
7243
7244 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
7245 SDLoc DL(Op);
7246 SDValue Const =
7247 DAG.getNode(RISCVISD::FLI, DL, VT,
7248 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
7249 if (!Negate)
7250 return Const;
7251
7252 return DAG.getNode(ISD::FNEG, DL, VT, Const);
7253}
7254
7256 SelectionDAG &DAG) {
7257
7258 unsigned IsData = Op.getConstantOperandVal(4);
7259
7260 // mips-p8700 we support data prefetch for now.
7261 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
7262 return Op.getOperand(0);
7263 return Op;
7264}
7265
7267 const RISCVSubtarget &Subtarget) {
7268 SDLoc dl(Op);
7269 AtomicOrdering FenceOrdering =
7270 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
7271 SyncScope::ID FenceSSID =
7272 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
7273
7274 if (Subtarget.hasStdExtZtso()) {
7275 // The only fence that needs an instruction is a sequentially-consistent
7276 // cross-thread fence.
7277 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
7278 FenceSSID == SyncScope::System)
7279 return Op;
7280
7281 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
7282 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
7283 }
7284
7285 // singlethread fences only synchronize with signal handlers on the same
7286 // thread and thus only need to preserve instruction order, not actually
7287 // enforce memory ordering.
7288 if (FenceSSID == SyncScope::SingleThread)
7289 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
7290 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
7291
7292 return Op;
7293}
7294
7295SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
7296 SelectionDAG &DAG) const {
7297 SDLoc DL(Op);
7298 MVT VT = Op.getSimpleValueType();
7299 MVT XLenVT = Subtarget.getXLenVT();
7300 unsigned Check = Op.getConstantOperandVal(1);
7301 unsigned TDCMask = 0;
7302 if (Check & fcSNan)
7303 TDCMask |= RISCV::FPMASK_Signaling_NaN;
7304 if (Check & fcQNan)
7305 TDCMask |= RISCV::FPMASK_Quiet_NaN;
7306 if (Check & fcPosInf)
7308 if (Check & fcNegInf)
7310 if (Check & fcPosNormal)
7312 if (Check & fcNegNormal)
7314 if (Check & fcPosSubnormal)
7316 if (Check & fcNegSubnormal)
7318 if (Check & fcPosZero)
7319 TDCMask |= RISCV::FPMASK_Positive_Zero;
7320 if (Check & fcNegZero)
7321 TDCMask |= RISCV::FPMASK_Negative_Zero;
7322
7323 bool IsOneBitMask = isPowerOf2_32(TDCMask);
7324
7325 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
7326
7327 if (VT.isVector()) {
7328 SDValue Op0 = Op.getOperand(0);
7329 MVT VT0 = Op.getOperand(0).getSimpleValueType();
7330
7331 if (VT.isScalableVector()) {
7332 MVT DstVT = VT0.changeVectorElementTypeToInteger();
7333 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
7334 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
7335 Mask = Op.getOperand(2);
7336 VL = Op.getOperand(3);
7337 }
7338 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
7339 VL, Op->getFlags());
7340 if (IsOneBitMask)
7341 return DAG.getSetCC(DL, VT, FPCLASS,
7342 DAG.getConstant(TDCMask, DL, DstVT),
7344 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
7345 DAG.getConstant(TDCMask, DL, DstVT));
7346 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
7347 ISD::SETNE);
7348 }
7349
7350 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
7351 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7352 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
7353 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
7354 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
7355 Mask = Op.getOperand(2);
7356 MVT MaskContainerVT =
7357 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7358 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7359 VL = Op.getOperand(3);
7360 }
7361 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
7362
7363 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
7364 Mask, VL, Op->getFlags());
7365
7366 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
7367 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
7368 if (IsOneBitMask) {
7369 SDValue VMSEQ =
7370 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
7371 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
7372 DAG.getUNDEF(ContainerVT), Mask, VL});
7373 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
7374 }
7375 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
7376 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
7377
7378 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7379 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
7380 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
7381
7382 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
7383 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
7384 DAG.getUNDEF(ContainerVT), Mask, VL});
7385 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
7386 }
7387
7388 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
7389 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
7390 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
7392 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7393}
7394
7395// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
7396// operations propagate nans.
7398 const RISCVSubtarget &Subtarget) {
7399 SDLoc DL(Op);
7400 MVT VT = Op.getSimpleValueType();
7401
7402 SDValue X = Op.getOperand(0);
7403 SDValue Y = Op.getOperand(1);
7404
7405 if (!VT.isVector()) {
7406 MVT XLenVT = Subtarget.getXLenVT();
7407
7408 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
7409 // ensures that when one input is a nan, the other will also be a nan
7410 // allowing the nan to propagate. If both inputs are nan, this will swap the
7411 // inputs which is harmless.
7412
7413 SDValue NewY = Y;
7414 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
7415 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
7416 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
7417 }
7418
7419 SDValue NewX = X;
7420 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
7421 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
7422 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
7423 }
7424
7425 unsigned Opc =
7426 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
7427 return DAG.getNode(Opc, DL, VT, NewX, NewY);
7428 }
7429
7430 // Check no NaNs before converting to fixed vector scalable.
7431 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
7432 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
7433
7434 MVT ContainerVT = VT;
7435 if (VT.isFixedLengthVector()) {
7436 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
7437 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
7438 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
7439 }
7440
7441 SDValue Mask, VL;
7442 if (Op->isVPOpcode()) {
7443 Mask = Op.getOperand(2);
7444 if (VT.isFixedLengthVector())
7445 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
7446 Subtarget);
7447 VL = Op.getOperand(3);
7448 } else {
7449 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7450 }
7451
7452 SDValue NewY = Y;
7453 if (!XIsNeverNan) {
7454 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7455 {X, X, DAG.getCondCode(ISD::SETOEQ),
7456 DAG.getUNDEF(ContainerVT), Mask, VL});
7457 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
7458 DAG.getUNDEF(ContainerVT), VL);
7459 }
7460
7461 SDValue NewX = X;
7462 if (!YIsNeverNan) {
7463 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7464 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
7465 DAG.getUNDEF(ContainerVT), Mask, VL});
7466 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
7467 DAG.getUNDEF(ContainerVT), VL);
7468 }
7469
7470 unsigned Opc =
7471 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
7472 ? RISCVISD::VFMAX_VL
7473 : RISCVISD::VFMIN_VL;
7474 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
7475 DAG.getUNDEF(ContainerVT), Mask, VL);
7476 if (VT.isFixedLengthVector())
7477 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
7478 return Res;
7479}
7480
7482 const RISCVSubtarget &Subtarget) {
7483 bool IsFABS = Op.getOpcode() == ISD::FABS;
7484 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
7485 "Wrong opcode for lowering FABS or FNEG.");
7486
7487 MVT XLenVT = Subtarget.getXLenVT();
7488 MVT VT = Op.getSimpleValueType();
7489 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7490
7491 SDLoc DL(Op);
7492 SDValue Fmv =
7493 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
7494
7495 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
7496 Mask = Mask.sext(Subtarget.getXLen());
7497
7498 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7499 SDValue Logic =
7500 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7501 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7502}
7503
7505 const RISCVSubtarget &Subtarget) {
7506 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7507
7508 MVT XLenVT = Subtarget.getXLenVT();
7509 MVT VT = Op.getSimpleValueType();
7510 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7511
7512 SDValue Mag = Op.getOperand(0);
7513 SDValue Sign = Op.getOperand(1);
7514
7515 SDLoc DL(Op);
7516
7517 // Get sign bit into an integer value.
7518 unsigned SignSize = Sign.getValueSizeInBits();
7519 SDValue SignAsInt = [&]() {
7520 if (SignSize == Subtarget.getXLen())
7521 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7522 switch (SignSize) {
7523 case 16:
7524 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7525 case 32:
7526 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7527 case 64: {
7528 assert(XLenVT == MVT::i32 && "Unexpected type");
7529 // Copy the upper word to integer.
7530 SignSize = 32;
7531 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7532 .getValue(1);
7533 }
7534 default:
7535 llvm_unreachable("Unexpected sign size");
7536 }
7537 }();
7538
7539 // Get the signbit at the right position for MagAsInt.
7540 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7541 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7542 SignAsInt,
7543 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7544
7545 // Mask the sign bit and any bits above it. The extra bits will be dropped
7546 // when we convert back to FP.
7547 SDValue SignMask = DAG.getConstant(
7548 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7549 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7550
7551 // Transform Mag value to integer, and clear the sign bit.
7552 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7553 SDValue ClearSignMask = DAG.getConstant(
7554 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7555 SDValue ClearedSign =
7556 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7557
7558 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7560
7561 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7562}
7563
7564/// Get a RISC-V target specified VL op for a given SDNode.
7565static unsigned getRISCVVLOp(SDValue Op) {
7566#define OP_CASE(NODE) \
7567 case ISD::NODE: \
7568 return RISCVISD::NODE##_VL;
7569#define VP_CASE(NODE) \
7570 case ISD::VP_##NODE: \
7571 return RISCVISD::NODE##_VL;
7572 // clang-format off
7573 switch (Op.getOpcode()) {
7574 default:
7575 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7576 OP_CASE(ADD)
7577 OP_CASE(SUB)
7578 OP_CASE(MUL)
7579 OP_CASE(MULHS)
7580 OP_CASE(MULHU)
7581 OP_CASE(SDIV)
7582 OP_CASE(SREM)
7583 OP_CASE(UDIV)
7584 OP_CASE(UREM)
7585 OP_CASE(SHL)
7586 OP_CASE(SRA)
7587 OP_CASE(SRL)
7588 OP_CASE(ROTL)
7589 OP_CASE(ROTR)
7590 OP_CASE(BSWAP)
7591 OP_CASE(CTTZ)
7592 OP_CASE(CTLZ)
7593 OP_CASE(CTPOP)
7594 OP_CASE(BITREVERSE)
7595 OP_CASE(SADDSAT)
7596 OP_CASE(UADDSAT)
7597 OP_CASE(SSUBSAT)
7598 OP_CASE(USUBSAT)
7599 OP_CASE(AVGFLOORS)
7600 OP_CASE(AVGFLOORU)
7601 OP_CASE(AVGCEILS)
7602 OP_CASE(AVGCEILU)
7603 OP_CASE(FADD)
7604 OP_CASE(FSUB)
7605 OP_CASE(FMUL)
7606 OP_CASE(FDIV)
7607 OP_CASE(FNEG)
7608 OP_CASE(FABS)
7609 OP_CASE(FCOPYSIGN)
7610 OP_CASE(FSQRT)
7611 OP_CASE(SMIN)
7612 OP_CASE(SMAX)
7613 OP_CASE(UMIN)
7614 OP_CASE(UMAX)
7615 OP_CASE(ABDS)
7616 OP_CASE(ABDU)
7617 OP_CASE(STRICT_FADD)
7618 OP_CASE(STRICT_FSUB)
7619 OP_CASE(STRICT_FMUL)
7620 OP_CASE(STRICT_FDIV)
7621 OP_CASE(STRICT_FSQRT)
7622 VP_CASE(ADD) // VP_ADD
7623 VP_CASE(SUB) // VP_SUB
7624 VP_CASE(MUL) // VP_MUL
7625 VP_CASE(SDIV) // VP_SDIV
7626 VP_CASE(SREM) // VP_SREM
7627 VP_CASE(UDIV) // VP_UDIV
7628 VP_CASE(UREM) // VP_UREM
7629 VP_CASE(SHL) // VP_SHL
7630 VP_CASE(FADD) // VP_FADD
7631 VP_CASE(FSUB) // VP_FSUB
7632 VP_CASE(FMUL) // VP_FMUL
7633 VP_CASE(FDIV) // VP_FDIV
7634 VP_CASE(FNEG) // VP_FNEG
7635 VP_CASE(FABS) // VP_FABS
7636 VP_CASE(SMIN) // VP_SMIN
7637 VP_CASE(SMAX) // VP_SMAX
7638 VP_CASE(UMIN) // VP_UMIN
7639 VP_CASE(UMAX) // VP_UMAX
7640 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7641 VP_CASE(SETCC) // VP_SETCC
7642 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7643 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7644 VP_CASE(BITREVERSE) // VP_BITREVERSE
7645 VP_CASE(SADDSAT) // VP_SADDSAT
7646 VP_CASE(UADDSAT) // VP_UADDSAT
7647 VP_CASE(SSUBSAT) // VP_SSUBSAT
7648 VP_CASE(USUBSAT) // VP_USUBSAT
7649 VP_CASE(BSWAP) // VP_BSWAP
7650 VP_CASE(CTLZ) // VP_CTLZ
7651 VP_CASE(CTTZ) // VP_CTTZ
7652 VP_CASE(CTPOP) // VP_CTPOP
7654 case ISD::VP_CTLZ_ZERO_UNDEF:
7655 return RISCVISD::CTLZ_VL;
7657 case ISD::VP_CTTZ_ZERO_UNDEF:
7658 return RISCVISD::CTTZ_VL;
7659 case ISD::FMA:
7660 case ISD::VP_FMA:
7661 return RISCVISD::VFMADD_VL;
7662 case ISD::STRICT_FMA:
7663 return RISCVISD::STRICT_VFMADD_VL;
7664 case ISD::AND:
7665 case ISD::VP_AND:
7666 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7667 return RISCVISD::VMAND_VL;
7668 return RISCVISD::AND_VL;
7669 case ISD::OR:
7670 case ISD::VP_OR:
7671 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7672 return RISCVISD::VMOR_VL;
7673 return RISCVISD::OR_VL;
7674 case ISD::XOR:
7675 case ISD::VP_XOR:
7676 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7677 return RISCVISD::VMXOR_VL;
7678 return RISCVISD::XOR_VL;
7679 case ISD::ANY_EXTEND:
7680 case ISD::ZERO_EXTEND:
7681 return RISCVISD::VZEXT_VL;
7682 case ISD::SIGN_EXTEND:
7683 return RISCVISD::VSEXT_VL;
7684 case ISD::SETCC:
7685 return RISCVISD::SETCC_VL;
7686 case ISD::VSELECT:
7687 return RISCVISD::VMERGE_VL;
7688 case ISD::VP_SELECT:
7689 case ISD::VP_MERGE:
7690 return RISCVISD::VMERGE_VL;
7691 case ISD::VP_SRA:
7692 return RISCVISD::SRA_VL;
7693 case ISD::VP_SRL:
7694 return RISCVISD::SRL_VL;
7695 case ISD::VP_SQRT:
7696 return RISCVISD::FSQRT_VL;
7697 case ISD::VP_SIGN_EXTEND:
7698 return RISCVISD::VSEXT_VL;
7699 case ISD::VP_ZERO_EXTEND:
7700 return RISCVISD::VZEXT_VL;
7701 case ISD::VP_FP_TO_SINT:
7702 return RISCVISD::VFCVT_RTZ_X_F_VL;
7703 case ISD::VP_FP_TO_UINT:
7704 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7705 case ISD::FMINNUM:
7706 case ISD::FMINIMUMNUM:
7707 case ISD::VP_FMINNUM:
7708 return RISCVISD::VFMIN_VL;
7709 case ISD::FMAXNUM:
7710 case ISD::FMAXIMUMNUM:
7711 case ISD::VP_FMAXNUM:
7712 return RISCVISD::VFMAX_VL;
7713 case ISD::LRINT:
7714 case ISD::VP_LRINT:
7715 case ISD::LLRINT:
7716 case ISD::VP_LLRINT:
7717 return RISCVISD::VFCVT_RM_X_F_VL;
7718 }
7719 // clang-format on
7720#undef OP_CASE
7721#undef VP_CASE
7722}
7723
7725 const RISCVSubtarget &Subtarget) {
7726 return (Op.getValueType() == MVT::nxv32f16 &&
7727 (Subtarget.hasVInstructionsF16Minimal() &&
7728 !Subtarget.hasVInstructionsF16())) ||
7729 (Op.getValueType() == MVT::nxv32bf16 &&
7730 Subtarget.hasVInstructionsBF16Minimal() &&
7731 (!Subtarget.hasVInstructionsBF16() ||
7732 (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) &&
7733 !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode()))));
7734}
7735
7737 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7738 SDLoc DL(Op);
7739
7740 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7741 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7742
7743 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7744 if (!Op.getOperand(j).getValueType().isVector()) {
7745 LoOperands[j] = Op.getOperand(j);
7746 HiOperands[j] = Op.getOperand(j);
7747 continue;
7748 }
7749 std::tie(LoOperands[j], HiOperands[j]) =
7750 DAG.SplitVector(Op.getOperand(j), DL);
7751 }
7752
7753 SDValue LoRes =
7754 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7755 SDValue HiRes =
7756 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7757
7758 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7759}
7760
7762 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7763 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7764 SDLoc DL(Op);
7765
7766 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7767 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7768
7769 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7770 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7771 std::tie(LoOperands[j], HiOperands[j]) =
7772 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7773 continue;
7774 }
7775 if (!Op.getOperand(j).getValueType().isVector()) {
7776 LoOperands[j] = Op.getOperand(j);
7777 HiOperands[j] = Op.getOperand(j);
7778 continue;
7779 }
7780 std::tie(LoOperands[j], HiOperands[j]) =
7781 DAG.SplitVector(Op.getOperand(j), DL);
7782 }
7783
7784 SDValue LoRes =
7785 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7786 SDValue HiRes =
7787 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7788
7789 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7790}
7791
7793 SDLoc DL(Op);
7794
7795 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7796 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7797 auto [EVLLo, EVLHi] =
7798 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7799
7800 SDValue ResLo =
7801 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7802 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7803 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7804 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7805}
7806
7808
7809 assert(Op->isStrictFPOpcode());
7810
7811 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7812
7813 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7814 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7815
7816 SDLoc DL(Op);
7817
7818 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7819 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7820
7821 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7822 if (!Op.getOperand(j).getValueType().isVector()) {
7823 LoOperands[j] = Op.getOperand(j);
7824 HiOperands[j] = Op.getOperand(j);
7825 continue;
7826 }
7827 std::tie(LoOperands[j], HiOperands[j]) =
7828 DAG.SplitVector(Op.getOperand(j), DL);
7829 }
7830
7831 SDValue LoRes =
7832 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7833 HiOperands[0] = LoRes.getValue(1);
7834 SDValue HiRes =
7835 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7836
7837 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7838 LoRes.getValue(0), HiRes.getValue(0));
7839 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7840}
7841
7842SDValue
7843RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7844 SelectionDAG &DAG) const {
7845 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7846 "Unexpected bfloat16 load lowering");
7847
7848 SDLoc DL(Op);
7849 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7850 EVT MemVT = LD->getMemoryVT();
7851 SDValue Load = DAG.getExtLoad(
7852 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7853 LD->getBasePtr(),
7855 LD->getMemOperand());
7856 // Using mask to make bf16 nan-boxing valid when we don't have flh
7857 // instruction. -65536 would be treat as a small number and thus it can be
7858 // directly used lui to get the constant.
7859 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7860 SDValue OrSixteenOne =
7861 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7862 SDValue ConvertedResult =
7863 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7864 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7865}
7866
7867SDValue
7868RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7869 SelectionDAG &DAG) const {
7870 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7871 "Unexpected bfloat16 store lowering");
7872
7873 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7874 SDLoc DL(Op);
7875 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7876 Subtarget.getXLenVT(), ST->getValue());
7877 return DAG.getTruncStore(
7878 ST->getChain(), DL, FMV, ST->getBasePtr(),
7879 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7880 ST->getMemOperand());
7881}
7882
7884 SelectionDAG &DAG) const {
7885 switch (Op.getOpcode()) {
7886 default:
7888 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7889 case ISD::PREFETCH:
7890 return LowerPREFETCH(Op, Subtarget, DAG);
7891 case ISD::ATOMIC_FENCE:
7892 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7893 case ISD::GlobalAddress:
7894 return lowerGlobalAddress(Op, DAG);
7895 case ISD::BlockAddress:
7896 return lowerBlockAddress(Op, DAG);
7897 case ISD::ConstantPool:
7898 return lowerConstantPool(Op, DAG);
7899 case ISD::JumpTable:
7900 return lowerJumpTable(Op, DAG);
7902 return lowerGlobalTLSAddress(Op, DAG);
7903 case ISD::Constant:
7904 return lowerConstant(Op, DAG, Subtarget);
7905 case ISD::ConstantFP:
7906 return lowerConstantFP(Op, DAG);
7907 case ISD::SELECT:
7908 return lowerSELECT(Op, DAG);
7909 case ISD::BRCOND:
7910 return lowerBRCOND(Op, DAG);
7911 case ISD::VASTART:
7912 return lowerVASTART(Op, DAG);
7913 case ISD::FRAMEADDR:
7914 return lowerFRAMEADDR(Op, DAG);
7915 case ISD::RETURNADDR:
7916 return lowerRETURNADDR(Op, DAG);
7917 case ISD::SHL_PARTS:
7918 return lowerShiftLeftParts(Op, DAG);
7919 case ISD::SRA_PARTS:
7920 return lowerShiftRightParts(Op, DAG, true);
7921 case ISD::SRL_PARTS:
7922 return lowerShiftRightParts(Op, DAG, false);
7923 case ISD::ROTL:
7924 case ISD::ROTR:
7925 if (Op.getValueType().isFixedLengthVector()) {
7926 assert(Subtarget.hasStdExtZvkb());
7927 return lowerToScalableOp(Op, DAG);
7928 }
7929 assert(Subtarget.hasVendorXTHeadBb() &&
7930 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7931 "Unexpected custom legalization");
7932 // XTHeadBb only supports rotate by constant.
7933 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7934 return SDValue();
7935 return Op;
7936 case ISD::BITCAST: {
7937 SDLoc DL(Op);
7938 EVT VT = Op.getValueType();
7939 SDValue Op0 = Op.getOperand(0);
7940 EVT Op0VT = Op0.getValueType();
7941 MVT XLenVT = Subtarget.getXLenVT();
7942 if (Op0VT == MVT::i16 &&
7943 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7944 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7945 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7946 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7947 }
7948 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7949 Subtarget.hasStdExtFOrZfinx()) {
7950 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7951 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7952 }
7953 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7954 Subtarget.hasStdExtDOrZdinx()) {
7955 SDValue Lo, Hi;
7956 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7957 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7958 }
7959
7960 if (Subtarget.hasStdExtP()) {
7961 bool Is32BitCast =
7962 (VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||
7963 (Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
7964 bool Is64BitCast =
7965 (VT == MVT::i64 && (Op0VT == MVT::v8i8 || Op0VT == MVT::v4i16 ||
7966 Op0VT == MVT::v2i32)) ||
7967 (Op0VT == MVT::i64 &&
7968 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
7969 if (Is32BitCast || Is64BitCast)
7970 return Op;
7971 }
7972
7973 // Consider other scalar<->scalar casts as legal if the types are legal.
7974 // Otherwise expand them.
7975 if (!VT.isVector() && !Op0VT.isVector()) {
7976 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7977 return Op;
7978 return SDValue();
7979 }
7980
7981 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7982 "Unexpected types");
7983
7984 if (VT.isFixedLengthVector()) {
7985 // We can handle fixed length vector bitcasts with a simple replacement
7986 // in isel.
7987 if (Op0VT.isFixedLengthVector())
7988 return Op;
7989 // When bitcasting from scalar to fixed-length vector, insert the scalar
7990 // into a one-element vector of the result type, and perform a vector
7991 // bitcast.
7992 if (!Op0VT.isVector()) {
7993 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7994 if (!isTypeLegal(BVT))
7995 return SDValue();
7996 return DAG.getBitcast(
7997 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7998 }
7999 return SDValue();
8000 }
8001 // Custom-legalize bitcasts from fixed-length vector types to scalar types
8002 // thus: bitcast the vector to a one-element vector type whose element type
8003 // is the same as the result type, and extract the first element.
8004 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
8005 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
8006 if (!isTypeLegal(BVT))
8007 return SDValue();
8008 SDValue BVec = DAG.getBitcast(BVT, Op0);
8009 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
8010 }
8011 return SDValue();
8012 }
8014 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8016 return LowerINTRINSIC_W_CHAIN(Op, DAG);
8018 return LowerINTRINSIC_VOID(Op, DAG);
8019 case ISD::IS_FPCLASS:
8020 return LowerIS_FPCLASS(Op, DAG);
8021 case ISD::BITREVERSE: {
8022 MVT VT = Op.getSimpleValueType();
8023 if (VT.isFixedLengthVector()) {
8024 assert(Subtarget.hasStdExtZvbb());
8025 return lowerToScalableOp(Op, DAG);
8026 }
8027 SDLoc DL(Op);
8028 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
8029 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
8030 // Expand bitreverse to a bswap(rev8) followed by brev8.
8031 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
8032 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
8033 }
8034 case ISD::TRUNCATE:
8037 // Only custom-lower vector truncates
8038 if (!Op.getSimpleValueType().isVector())
8039 return Op;
8040 return lowerVectorTruncLike(Op, DAG);
8041 case ISD::ANY_EXTEND:
8042 case ISD::ZERO_EXTEND:
8043 if (Op.getOperand(0).getValueType().isVector() &&
8044 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
8045 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
8046 if (Op.getValueType().isScalableVector())
8047 return Op;
8048 return lowerToScalableOp(Op, DAG);
8049 case ISD::SIGN_EXTEND:
8050 if (Op.getOperand(0).getValueType().isVector() &&
8051 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
8052 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
8053 if (Op.getValueType().isScalableVector())
8054 return Op;
8055 return lowerToScalableOp(Op, DAG);
8057 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
8059 return lowerINSERT_VECTOR_ELT(Op, DAG);
8061 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
8062 case ISD::SCALAR_TO_VECTOR: {
8063 MVT VT = Op.getSimpleValueType();
8064 SDLoc DL(Op);
8065 SDValue Scalar = Op.getOperand(0);
8066 if (VT.getVectorElementType() == MVT::i1) {
8067 MVT WideVT = VT.changeVectorElementType(MVT::i8);
8068 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
8069 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
8070 }
8071 MVT ContainerVT = VT;
8072 if (VT.isFixedLengthVector())
8073 ContainerVT = getContainerForFixedLengthVector(VT);
8074 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8075
8076 SDValue V;
8077 if (VT.isFloatingPoint()) {
8078 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
8079 DAG.getUNDEF(ContainerVT), Scalar, VL);
8080 } else {
8081 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
8082 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
8083 DAG.getUNDEF(ContainerVT), Scalar, VL);
8084 }
8085 if (VT.isFixedLengthVector())
8086 V = convertFromScalableVector(VT, V, DAG, Subtarget);
8087 return V;
8088 }
8089 case ISD::VSCALE: {
8090 MVT XLenVT = Subtarget.getXLenVT();
8091 MVT VT = Op.getSimpleValueType();
8092 SDLoc DL(Op);
8093 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8094 // We define our scalable vector types for lmul=1 to use a 64 bit known
8095 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
8096 // vscale as VLENB / 8.
8097 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
8098 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
8099 reportFatalInternalError("Support for VLEN==32 is incomplete.");
8100 // We assume VLENB is a multiple of 8. We manually choose the best shift
8101 // here because SimplifyDemandedBits isn't always able to simplify it.
8102 uint64_t Val = Op.getConstantOperandVal(0);
8103 if (isPowerOf2_64(Val)) {
8104 uint64_t Log2 = Log2_64(Val);
8105 if (Log2 < 3) {
8106 SDNodeFlags Flags;
8107 Flags.setExact(true);
8108 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
8109 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
8110 } else if (Log2 > 3) {
8111 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
8112 DAG.getConstant(Log2 - 3, DL, XLenVT));
8113 }
8114 } else if ((Val % 8) == 0) {
8115 // If the multiplier is a multiple of 8, scale it down to avoid needing
8116 // to shift the VLENB value.
8117 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
8118 DAG.getConstant(Val / 8, DL, XLenVT));
8119 } else {
8120 SDNodeFlags Flags;
8121 Flags.setExact(true);
8122 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
8123 DAG.getConstant(3, DL, XLenVT), Flags);
8124 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
8125 DAG.getConstant(Val, DL, XLenVT));
8126 }
8127 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
8128 }
8129 case ISD::FPOWI: {
8130 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
8131 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
8132 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
8133 Op.getOperand(1).getValueType() == MVT::i32) {
8134 SDLoc DL(Op);
8135 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
8136 SDValue Powi =
8137 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
8138 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
8139 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8140 }
8141 return SDValue();
8142 }
8143 case ISD::FMAXIMUM:
8144 case ISD::FMINIMUM:
8145 if (isPromotedOpNeedingSplit(Op, Subtarget))
8146 return SplitVectorOp(Op, DAG);
8147 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8148 case ISD::FP_EXTEND:
8149 case ISD::FP_ROUND:
8150 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8153 return lowerStrictFPExtendOrRoundLike(Op, DAG);
8154 case ISD::SINT_TO_FP:
8155 case ISD::UINT_TO_FP:
8156 if (Op.getValueType().isVector() &&
8157 ((Op.getValueType().getScalarType() == MVT::f16 &&
8158 (Subtarget.hasVInstructionsF16Minimal() &&
8159 !Subtarget.hasVInstructionsF16())) ||
8160 Op.getValueType().getScalarType() == MVT::bf16)) {
8161 if (isPromotedOpNeedingSplit(Op, Subtarget))
8162 return SplitVectorOp(Op, DAG);
8163 // int -> f32
8164 SDLoc DL(Op);
8165 MVT NVT =
8166 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8167 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8168 // f32 -> [b]f16
8169 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8170 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8171 }
8172 [[fallthrough]];
8173 case ISD::FP_TO_SINT:
8174 case ISD::FP_TO_UINT:
8175 if (SDValue Op1 = Op.getOperand(0);
8176 Op1.getValueType().isVector() &&
8177 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8178 (Subtarget.hasVInstructionsF16Minimal() &&
8179 !Subtarget.hasVInstructionsF16())) ||
8180 Op1.getValueType().getScalarType() == MVT::bf16)) {
8181 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8182 return SplitVectorOp(Op, DAG);
8183 // [b]f16 -> f32
8184 SDLoc DL(Op);
8185 MVT NVT = MVT::getVectorVT(MVT::f32,
8186 Op1.getValueType().getVectorElementCount());
8187 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8188 // f32 -> int
8189 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
8190 }
8191 [[fallthrough]];
8196 // RVV can only do fp<->int conversions to types half/double the size as
8197 // the source. We custom-lower any conversions that do two hops into
8198 // sequences.
8199 MVT VT = Op.getSimpleValueType();
8200 if (VT.isScalarInteger())
8201 return lowerFP_TO_INT(Op, DAG, Subtarget);
8202 bool IsStrict = Op->isStrictFPOpcode();
8203 SDValue Src = Op.getOperand(0 + IsStrict);
8204 MVT SrcVT = Src.getSimpleValueType();
8205 if (SrcVT.isScalarInteger())
8206 return lowerINT_TO_FP(Op, DAG, Subtarget);
8207 if (!VT.isVector())
8208 return Op;
8209 SDLoc DL(Op);
8210 MVT EltVT = VT.getVectorElementType();
8211 MVT SrcEltVT = SrcVT.getVectorElementType();
8212 unsigned EltSize = EltVT.getSizeInBits();
8213 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
8214 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
8215 "Unexpected vector element types");
8216
8217 bool IsInt2FP = SrcEltVT.isInteger();
8218 // Widening conversions
8219 if (EltSize > (2 * SrcEltSize)) {
8220 if (IsInt2FP) {
8221 // Do a regular integer sign/zero extension then convert to float.
8222 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
8224 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
8225 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
8228 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
8229 if (IsStrict)
8230 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
8231 Op.getOperand(0), Ext);
8232 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
8233 }
8234 // FP2Int
8235 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
8236 // Do one doubling fp_extend then complete the operation by converting
8237 // to int.
8238 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
8239 if (IsStrict) {
8240 auto [FExt, Chain] =
8241 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
8242 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
8243 }
8244 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
8245 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
8246 }
8247
8248 // Narrowing conversions
8249 if (SrcEltSize > (2 * EltSize)) {
8250 if (IsInt2FP) {
8251 // One narrowing int_to_fp, then an fp_round.
8252 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
8253 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
8254 if (IsStrict) {
8255 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
8256 DAG.getVTList(InterimFVT, MVT::Other),
8257 Op.getOperand(0), Src);
8258 SDValue Chain = Int2FP.getValue(1);
8259 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
8260 }
8261 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
8262 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
8263 }
8264 // FP2Int
8265 // One narrowing fp_to_int, then truncate the integer. If the float isn't
8266 // representable by the integer, the result is poison.
8267 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
8269 if (IsStrict) {
8270 SDValue FP2Int =
8271 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
8272 Op.getOperand(0), Src);
8273 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
8274 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
8275 }
8276 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
8277 if (EltSize == 1)
8278 // The integer should be 0 or 1/-1, so compare the integer result to 0.
8279 return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, IVecVT), FP2Int,
8280 ISD::SETNE);
8281 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
8282 }
8283
8284 // Scalable vectors can exit here. Patterns will handle equally-sized
8285 // conversions halving/doubling ones.
8286 if (!VT.isFixedLengthVector())
8287 return Op;
8288
8289 // For fixed-length vectors we lower to a custom "VL" node.
8290 unsigned RVVOpc = 0;
8291 switch (Op.getOpcode()) {
8292 default:
8293 llvm_unreachable("Impossible opcode");
8294 case ISD::FP_TO_SINT:
8295 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
8296 break;
8297 case ISD::FP_TO_UINT:
8298 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
8299 break;
8300 case ISD::SINT_TO_FP:
8301 RVVOpc = RISCVISD::SINT_TO_FP_VL;
8302 break;
8303 case ISD::UINT_TO_FP:
8304 RVVOpc = RISCVISD::UINT_TO_FP_VL;
8305 break;
8307 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
8308 break;
8310 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
8311 break;
8313 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
8314 break;
8316 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
8317 break;
8318 }
8319
8320 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8321 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8322 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
8323 "Expected same element count");
8324
8325 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8326
8327 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8328 if (IsStrict) {
8329 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8330 Op.getOperand(0), Src, Mask, VL);
8331 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
8332 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
8333 }
8334 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
8335 return convertFromScalableVector(VT, Src, DAG, Subtarget);
8336 }
8339 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
8340 case ISD::FP_TO_BF16: {
8341 // Custom lower to ensure the libcall return is passed in an FPR on hard
8342 // float ABIs.
8343 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
8344 SDLoc DL(Op);
8345 MakeLibCallOptions CallOptions;
8346 RTLIB::Libcall LC =
8347 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
8348 SDValue Res =
8349 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
8350 if (Subtarget.is64Bit())
8351 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
8352 return DAG.getBitcast(MVT::i32, Res);
8353 }
8354 case ISD::BF16_TO_FP: {
8355 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
8356 MVT VT = Op.getSimpleValueType();
8357 SDLoc DL(Op);
8358 Op = DAG.getNode(
8359 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
8360 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
8361 SDValue Res = Subtarget.is64Bit()
8362 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
8363 : DAG.getBitcast(MVT::f32, Op);
8364 // fp_extend if the target VT is bigger than f32.
8365 if (VT != MVT::f32)
8366 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
8367 return Res;
8368 }
8370 case ISD::FP_TO_FP16: {
8371 // Custom lower to ensure the libcall return is passed in an FPR on hard
8372 // float ABIs.
8373 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
8374 SDLoc DL(Op);
8375 MakeLibCallOptions CallOptions;
8376 bool IsStrict = Op->isStrictFPOpcode();
8377 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
8378 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8379 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
8380 SDValue Res;
8381 std::tie(Res, Chain) =
8382 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
8383 if (Subtarget.is64Bit())
8384 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
8385 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
8386 if (IsStrict)
8387 return DAG.getMergeValues({Result, Chain}, DL);
8388 return Result;
8389 }
8391 case ISD::FP16_TO_FP: {
8392 // Custom lower to ensure the libcall argument is passed in an FPR on hard
8393 // float ABIs.
8394 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
8395 SDLoc DL(Op);
8396 MakeLibCallOptions CallOptions;
8397 bool IsStrict = Op->isStrictFPOpcode();
8398 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
8399 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8400 SDValue Arg = Subtarget.is64Bit()
8401 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
8402 : DAG.getBitcast(MVT::f32, Op0);
8403 SDValue Res;
8404 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
8405 CallOptions, DL, Chain);
8406 if (IsStrict)
8407 return DAG.getMergeValues({Res, Chain}, DL);
8408 return Res;
8409 }
8410 case ISD::FTRUNC:
8411 case ISD::FCEIL:
8412 case ISD::FFLOOR:
8413 case ISD::FNEARBYINT:
8414 case ISD::FRINT:
8415 case ISD::FROUND:
8416 case ISD::FROUNDEVEN:
8417 if (isPromotedOpNeedingSplit(Op, Subtarget))
8418 return SplitVectorOp(Op, DAG);
8419 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8420 case ISD::LRINT:
8421 case ISD::LLRINT:
8422 case ISD::LROUND:
8423 case ISD::LLROUND: {
8424 if (Op.getValueType().isVector())
8425 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
8426 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
8427 "Unexpected custom legalisation");
8428 SDLoc DL(Op);
8429 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
8430 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
8431 }
8432 case ISD::STRICT_LRINT:
8433 case ISD::STRICT_LLRINT:
8434 case ISD::STRICT_LROUND:
8435 case ISD::STRICT_LLROUND: {
8436 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
8437 "Unexpected custom legalisation");
8438 SDLoc DL(Op);
8439 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
8440 {Op.getOperand(0), Op.getOperand(1)});
8441 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
8442 {Ext.getValue(1), Ext.getValue(0)});
8443 }
8444 case ISD::VECREDUCE_ADD:
8449 return lowerVECREDUCE(Op, DAG);
8450 case ISD::VECREDUCE_AND:
8451 case ISD::VECREDUCE_OR:
8452 case ISD::VECREDUCE_XOR:
8453 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
8454 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
8455 return lowerVECREDUCE(Op, DAG);
8462 return lowerFPVECREDUCE(Op, DAG);
8463 case ISD::VP_REDUCE_ADD:
8464 case ISD::VP_REDUCE_UMAX:
8465 case ISD::VP_REDUCE_SMAX:
8466 case ISD::VP_REDUCE_UMIN:
8467 case ISD::VP_REDUCE_SMIN:
8468 case ISD::VP_REDUCE_FADD:
8469 case ISD::VP_REDUCE_SEQ_FADD:
8470 case ISD::VP_REDUCE_FMIN:
8471 case ISD::VP_REDUCE_FMAX:
8472 case ISD::VP_REDUCE_FMINIMUM:
8473 case ISD::VP_REDUCE_FMAXIMUM:
8474 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
8475 return SplitVectorReductionOp(Op, DAG);
8476 return lowerVPREDUCE(Op, DAG);
8477 case ISD::VP_REDUCE_AND:
8478 case ISD::VP_REDUCE_OR:
8479 case ISD::VP_REDUCE_XOR:
8480 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
8481 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
8482 return lowerVPREDUCE(Op, DAG);
8483 case ISD::VP_CTTZ_ELTS:
8484 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
8485 return lowerVPCttzElements(Op, DAG);
8486 case ISD::UNDEF: {
8487 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
8488 return convertFromScalableVector(Op.getSimpleValueType(),
8489 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
8490 }
8492 return lowerINSERT_SUBVECTOR(Op, DAG);
8494 return lowerEXTRACT_SUBVECTOR(Op, DAG);
8496 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
8498 return lowerVECTOR_INTERLEAVE(Op, DAG);
8499 case ISD::STEP_VECTOR:
8500 return lowerSTEP_VECTOR(Op, DAG);
8502 return lowerVECTOR_REVERSE(Op, DAG);
8505 return lowerVECTOR_SPLICE(Op, DAG);
8506 case ISD::BUILD_VECTOR: {
8507 MVT VT = Op.getSimpleValueType();
8508 MVT EltVT = VT.getVectorElementType();
8509 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
8510 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
8511 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
8512 }
8513 case ISD::SPLAT_VECTOR: {
8514 MVT VT = Op.getSimpleValueType();
8515 MVT EltVT = VT.getVectorElementType();
8516 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
8517 EltVT == MVT::bf16) {
8518 SDLoc DL(Op);
8519 SDValue Elt;
8520 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8521 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8522 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8523 Op.getOperand(0));
8524 else
8525 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8526 MVT IVT = VT.changeVectorElementType(MVT::i16);
8527 return DAG.getNode(ISD::BITCAST, DL, VT,
8528 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8529 }
8530
8531 if (EltVT == MVT::i1)
8532 return lowerVectorMaskSplat(Op, DAG);
8533 return SDValue();
8534 }
8536 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8537 case ISD::CONCAT_VECTORS: {
8538 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8539 // better than going through the stack, as the default expansion does.
8540 SDLoc DL(Op);
8541 MVT VT = Op.getSimpleValueType();
8542 MVT ContainerVT = VT;
8543 if (VT.isFixedLengthVector())
8544 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8545
8546 // Recursively split concat_vectors with more than 2 operands:
8547 //
8548 // concat_vector op1, op2, op3, op4
8549 // ->
8550 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8551 //
8552 // This reduces the length of the chain of vslideups and allows us to
8553 // perform the vslideups at a smaller LMUL, limited to MF2.
8554 if (Op.getNumOperands() > 2 &&
8555 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8556 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8557 assert(isPowerOf2_32(Op.getNumOperands()));
8558 size_t HalfNumOps = Op.getNumOperands() / 2;
8559 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8560 Op->ops().take_front(HalfNumOps));
8561 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8562 Op->ops().drop_front(HalfNumOps));
8563 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8564 }
8565
8566 unsigned NumOpElts =
8567 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8568 SDValue Vec = DAG.getUNDEF(VT);
8569 for (const auto &OpIdx : enumerate(Op->ops())) {
8570 SDValue SubVec = OpIdx.value();
8571 // Don't insert undef subvectors.
8572 if (SubVec.isUndef())
8573 continue;
8574 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8575 }
8576 return Vec;
8577 }
8578 case ISD::LOAD: {
8579 auto *Load = cast<LoadSDNode>(Op);
8580 EVT VT = Load->getValueType(0);
8581 if (VT == MVT::f64) {
8582 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8583 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8584
8585 // Replace a double precision load with two i32 loads and a BuildPairF64.
8586 SDLoc DL(Op);
8587 SDValue BasePtr = Load->getBasePtr();
8588 SDValue Chain = Load->getChain();
8589
8590 SDValue Lo =
8591 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8592 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8593 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8594 SDValue Hi = DAG.getLoad(
8595 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8596 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8597 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8598 Hi.getValue(1));
8599
8600 // For big-endian, swap the order of Lo and Hi.
8601 if (!Subtarget.isLittleEndian())
8602 std::swap(Lo, Hi);
8603
8604 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8605 return DAG.getMergeValues({Pair, Chain}, DL);
8606 }
8607
8608 if (VT == MVT::bf16)
8609 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8610
8611 // Handle normal vector tuple load.
8612 if (VT.isRISCVVectorTuple()) {
8613 SDLoc DL(Op);
8614 MVT XLenVT = Subtarget.getXLenVT();
8615 unsigned NF = VT.getRISCVVectorTupleNumFields();
8616 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8617 unsigned NumElts = Sz / (NF * 8);
8618 int Log2LMUL = Log2_64(NumElts) - 3;
8619
8620 auto Flag = SDNodeFlags();
8621 Flag.setNoUnsignedWrap(true);
8622 SDValue Ret = DAG.getUNDEF(VT);
8623 SDValue BasePtr = Load->getBasePtr();
8624 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8625 VROffset =
8626 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8627 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8628 SmallVector<SDValue, 8> OutChains;
8629
8630 // Load NF vector registers and combine them to a vector tuple.
8631 for (unsigned i = 0; i < NF; ++i) {
8632 SDValue LoadVal = DAG.getLoad(
8633 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8634 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8635 OutChains.push_back(LoadVal.getValue(1));
8636 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8637 DAG.getTargetConstant(i, DL, MVT::i32));
8638 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8639 }
8640 return DAG.getMergeValues(
8641 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8642 }
8643
8644 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8645 return V;
8646 if (Op.getValueType().isFixedLengthVector())
8647 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8648 return Op;
8649 }
8650 case ISD::STORE: {
8651 auto *Store = cast<StoreSDNode>(Op);
8652 SDValue StoredVal = Store->getValue();
8653 EVT VT = StoredVal.getValueType();
8654 if (Subtarget.hasStdExtP()) {
8655 if (VT == MVT::v2i16 || VT == MVT::v4i8) {
8656 SDValue DL(Op);
8657 SDValue Cast = DAG.getBitcast(MVT::i32, StoredVal);
8658 SDValue NewStore =
8659 DAG.getStore(Store->getChain(), DL, Cast, Store->getBasePtr(),
8660 Store->getPointerInfo(), Store->getBaseAlign(),
8661 Store->getMemOperand()->getFlags());
8662 return NewStore;
8663 }
8664 }
8665 if (VT == MVT::f64) {
8666 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8667 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8668
8669 // Replace a double precision store with a SplitF64 and i32 stores.
8670 SDValue DL(Op);
8671 SDValue BasePtr = Store->getBasePtr();
8672 SDValue Chain = Store->getChain();
8673 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8674 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8675
8676 SDValue Lo = Split.getValue(0);
8677 SDValue Hi = Split.getValue(1);
8678
8679 // For big-endian, swap the order of Lo and Hi before storing.
8680 if (!Subtarget.isLittleEndian())
8681 std::swap(Lo, Hi);
8682
8683 SDValue LoStore = DAG.getStore(
8684 Chain, DL, Lo, BasePtr, Store->getPointerInfo(),
8685 Store->getBaseAlign(), Store->getMemOperand()->getFlags());
8686 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8687 SDValue HiStore = DAG.getStore(
8688 Chain, DL, Hi, BasePtr, Store->getPointerInfo().getWithOffset(4),
8689 Store->getBaseAlign(), Store->getMemOperand()->getFlags());
8690 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);
8691 }
8692 if (VT == MVT::i64) {
8693 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8694 "Unexpected custom legalisation");
8695 if (Store->isTruncatingStore())
8696 return SDValue();
8697
8698 if (Store->getAlign() < Subtarget.getZilsdAlign())
8699 return SDValue();
8700
8701 SDLoc DL(Op);
8702 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8703 DAG.getTargetConstant(0, DL, MVT::i32));
8704 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8705 DAG.getTargetConstant(1, DL, MVT::i32));
8706
8707 return DAG.getMemIntrinsicNode(
8708 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8709 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8710 Store->getMemOperand());
8711 }
8712
8713 if (VT == MVT::bf16)
8714 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8715
8716 // Handle normal vector tuple store.
8717 if (VT.isRISCVVectorTuple()) {
8718 SDLoc DL(Op);
8719 MVT XLenVT = Subtarget.getXLenVT();
8720 unsigned NF = VT.getRISCVVectorTupleNumFields();
8721 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8722 unsigned NumElts = Sz / (NF * 8);
8723 int Log2LMUL = Log2_64(NumElts) - 3;
8724
8725 auto Flag = SDNodeFlags();
8726 Flag.setNoUnsignedWrap(true);
8727 SDValue Ret;
8728 SDValue Chain = Store->getChain();
8729 SDValue BasePtr = Store->getBasePtr();
8730 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8731 VROffset =
8732 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8733 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8734
8735 // Extract subregisters in a vector tuple and store them individually.
8736 for (unsigned i = 0; i < NF; ++i) {
8737 auto Extract =
8738 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8739 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8740 DAG.getTargetConstant(i, DL, MVT::i32));
8741 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8742 MachinePointerInfo(Store->getAddressSpace()),
8743 Store->getBaseAlign(),
8744 Store->getMemOperand()->getFlags());
8745 Chain = Ret.getValue(0);
8746 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8747 }
8748 return Ret;
8749 }
8750
8751 if (auto V = expandUnalignedRVVStore(Op, DAG))
8752 return V;
8753 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8754 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8755 return Op;
8756 }
8757 case ISD::VP_LOAD:
8758 if (SDValue V = expandUnalignedVPLoad(Op, DAG))
8759 return V;
8760 [[fallthrough]];
8761 case ISD::MLOAD:
8762 return lowerMaskedLoad(Op, DAG);
8763 case ISD::VP_LOAD_FF:
8764 return lowerLoadFF(Op, DAG);
8765 case ISD::VP_STORE:
8766 if (SDValue V = expandUnalignedVPStore(Op, DAG))
8767 return V;
8768 [[fallthrough]];
8769 case ISD::MSTORE:
8770 return lowerMaskedStore(Op, DAG);
8772 return lowerVectorCompress(Op, DAG);
8773 case ISD::SELECT_CC: {
8774 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8775 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8776 // into separate SETCC+SELECT just like LegalizeDAG.
8777 SDValue Tmp1 = Op.getOperand(0);
8778 SDValue Tmp2 = Op.getOperand(1);
8779 SDValue True = Op.getOperand(2);
8780 SDValue False = Op.getOperand(3);
8781 EVT VT = Op.getValueType();
8782 SDValue CC = Op.getOperand(4);
8783 EVT CmpVT = Tmp1.getValueType();
8784 EVT CCVT =
8785 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8786 SDLoc DL(Op);
8787 SDValue Cond =
8788 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8789 return DAG.getSelect(DL, VT, Cond, True, False);
8790 }
8791 case ISD::SETCC: {
8792 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8793 if (OpVT.isScalarInteger()) {
8794 MVT VT = Op.getSimpleValueType();
8795 SDValue LHS = Op.getOperand(0);
8796 SDValue RHS = Op.getOperand(1);
8797 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8798 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8799 "Unexpected CondCode");
8800
8801 SDLoc DL(Op);
8802
8803 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8804 // convert this to the equivalent of (set(u)ge X, C+1) by using
8805 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8806 // in a register.
8807 if (isa<ConstantSDNode>(RHS)) {
8808 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8809 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8810 // If this is an unsigned compare and the constant is -1, incrementing
8811 // the constant would change behavior. The result should be false.
8812 if (CCVal == ISD::SETUGT && Imm == -1)
8813 return DAG.getConstant(0, DL, VT);
8814 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8815 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8816 SDValue SetCC = DAG.getSetCC(
8817 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8818 return DAG.getLogicalNOT(DL, SetCC, VT);
8819 }
8820 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8821 if (CCVal == ISD::SETUGT && Imm == 2047) {
8822 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8823 DAG.getShiftAmountConstant(11, OpVT, DL));
8824 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8825 ISD::SETNE);
8826 }
8827 }
8828
8829 // Not a constant we could handle, swap the operands and condition code to
8830 // SETLT/SETULT.
8831 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8832 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8833 }
8834
8835 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8836 return SplitVectorOp(Op, DAG);
8837
8838 return lowerToScalableOp(Op, DAG);
8839 }
8840 case ISD::ADD:
8841 case ISD::SUB:
8842 case ISD::MUL:
8843 case ISD::MULHS:
8844 case ISD::MULHU:
8845 case ISD::AND:
8846 case ISD::OR:
8847 case ISD::XOR:
8848 case ISD::SDIV:
8849 case ISD::SREM:
8850 case ISD::UDIV:
8851 case ISD::UREM:
8852 case ISD::BSWAP:
8853 case ISD::CTPOP:
8854 case ISD::VSELECT:
8855 return lowerToScalableOp(Op, DAG);
8856 case ISD::SHL:
8857 case ISD::SRL:
8858 case ISD::SRA:
8859 if (Op.getSimpleValueType().isFixedLengthVector()) {
8860 if (Subtarget.hasStdExtP()) {
8861 SDValue ShAmtVec = Op.getOperand(1);
8862 SDValue SplatVal;
8863 if (ShAmtVec.getOpcode() == ISD::SPLAT_VECTOR)
8864 SplatVal = ShAmtVec.getOperand(0);
8865 else if (ShAmtVec.getOpcode() == ISD::BUILD_VECTOR)
8866 SplatVal = cast<BuildVectorSDNode>(ShAmtVec)->getSplatValue();
8867
8868 if (!SplatVal)
8869 return DAG.UnrollVectorOp(Op.getNode());
8870
8871 unsigned Opc;
8872 switch (Op.getOpcode()) {
8873 default:
8874 llvm_unreachable("Unexpected opcode");
8875 case ISD::SHL:
8876 Opc = RISCVISD::PSHL;
8877 break;
8878 case ISD::SRL:
8879 Opc = RISCVISD::PSRL;
8880 break;
8881 case ISD::SRA:
8882 Opc = RISCVISD::PSRA;
8883 break;
8884 }
8885 return DAG.getNode(Opc, SDLoc(Op), Op.getValueType(), Op.getOperand(0),
8886 SplatVal);
8887 }
8888 return lowerToScalableOp(Op, DAG);
8889 }
8890 // This can be called for an i32 shift amount that needs to be promoted.
8891 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8892 "Unexpected custom legalisation");
8893 return SDValue();
8894 case ISD::SSHLSAT: {
8895 MVT VT = Op.getSimpleValueType();
8896 assert(VT.isFixedLengthVector() && Subtarget.hasStdExtP() &&
8897 "Unexptect custom legalisation");
8898 APInt Splat;
8899 if (!ISD::isConstantSplatVector(Op.getOperand(1).getNode(), Splat))
8900 return SDValue();
8901 uint64_t ShAmt = Splat.getZExtValue();
8902 if (ShAmt >= VT.getVectorElementType().getSizeInBits())
8903 return SDValue();
8904 SDLoc DL(Op);
8905 return DAG.getNode(RISCVISD::PSSLAI, DL, VT, Op.getOperand(0),
8906 DAG.getTargetConstant(ShAmt, DL, Subtarget.getXLenVT()));
8907 }
8908 case ISD::FABS:
8909 case ISD::FNEG:
8910 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8911 return lowerFABSorFNEG(Op, DAG, Subtarget);
8912 [[fallthrough]];
8913 case ISD::FADD:
8914 case ISD::FSUB:
8915 case ISD::FMUL:
8916 case ISD::FDIV:
8917 case ISD::FSQRT:
8918 case ISD::FMA:
8919 case ISD::FMINNUM:
8920 case ISD::FMAXNUM:
8921 case ISD::FMINIMUMNUM:
8922 case ISD::FMAXIMUMNUM:
8923 if (isPromotedOpNeedingSplit(Op, Subtarget))
8924 return SplitVectorOp(Op, DAG);
8925 [[fallthrough]];
8926 case ISD::AVGFLOORS:
8927 case ISD::AVGFLOORU:
8928 case ISD::AVGCEILS:
8929 case ISD::AVGCEILU:
8930 case ISD::SMIN:
8931 case ISD::SMAX:
8932 case ISD::UMIN:
8933 case ISD::UMAX:
8934 case ISD::UADDSAT:
8935 case ISD::USUBSAT:
8936 case ISD::SADDSAT:
8937 case ISD::SSUBSAT:
8938 return lowerToScalableOp(Op, DAG);
8939 case ISD::ABDS:
8940 case ISD::ABDU: {
8941 EVT VT = Op->getValueType(0);
8942 // Only SEW=8/16 are supported in Zvabd.
8943 if (Subtarget.hasStdExtZvabd() && VT.isVector() &&
8944 (VT.getVectorElementType() == MVT::i8 ||
8945 VT.getVectorElementType() == MVT::i16))
8946 return lowerToScalableOp(Op, DAG);
8947
8948 SDLoc dl(Op);
8949 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8950 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8951 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8952
8953 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8954 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8955 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8956 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8957 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8958 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8959 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8960 }
8961 case ISD::ABS:
8962 case ISD::VP_ABS:
8963 return lowerABS(Op, DAG);
8964 case ISD::CTLZ:
8966 case ISD::CTTZ:
8968 if (Subtarget.hasStdExtZvbb())
8969 return lowerToScalableOp(Op, DAG);
8970 assert(Op.getOpcode() != ISD::CTTZ);
8971 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8972 case ISD::CLMUL: {
8973 MVT VT = Op.getSimpleValueType();
8974 assert(VT.isScalableVector() && Subtarget.hasStdExtZvbc() &&
8975 "Unexpected custom legalisation");
8976 // Promote to i64 vector.
8977 MVT I64VecVT = VT.changeVectorElementType(MVT::i64);
8978 SDLoc DL(Op);
8979 SDValue Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, I64VecVT, Op.getOperand(0));
8980 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, I64VecVT, Op.getOperand(1));
8981 SDValue CLMUL = DAG.getNode(ISD::CLMUL, DL, I64VecVT, Op0, Op1);
8982 return DAG.getNode(ISD::TRUNCATE, DL, VT, CLMUL);
8983 }
8984 case ISD::FCOPYSIGN:
8985 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8986 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8987 if (isPromotedOpNeedingSplit(Op, Subtarget))
8988 return SplitVectorOp(Op, DAG);
8989 return lowerToScalableOp(Op, DAG);
8990 case ISD::STRICT_FADD:
8991 case ISD::STRICT_FSUB:
8992 case ISD::STRICT_FMUL:
8993 case ISD::STRICT_FDIV:
8994 case ISD::STRICT_FSQRT:
8995 case ISD::STRICT_FMA:
8996 if (isPromotedOpNeedingSplit(Op, Subtarget))
8997 return SplitStrictFPVectorOp(Op, DAG);
8998 return lowerToScalableOp(Op, DAG);
8999 case ISD::STRICT_FSETCC:
9001 return lowerVectorStrictFSetcc(Op, DAG);
9002 case ISD::STRICT_FCEIL:
9003 case ISD::STRICT_FRINT:
9004 case ISD::STRICT_FFLOOR:
9005 case ISD::STRICT_FTRUNC:
9007 case ISD::STRICT_FROUND:
9009 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
9010 case ISD::MGATHER:
9011 case ISD::VP_GATHER:
9012 return lowerMaskedGather(Op, DAG);
9013 case ISD::MSCATTER:
9014 case ISD::VP_SCATTER:
9015 return lowerMaskedScatter(Op, DAG);
9016 case ISD::GET_ROUNDING:
9017 return lowerGET_ROUNDING(Op, DAG);
9018 case ISD::SET_ROUNDING:
9019 return lowerSET_ROUNDING(Op, DAG);
9020 case ISD::GET_FPENV:
9021 return lowerGET_FPENV(Op, DAG);
9022 case ISD::SET_FPENV:
9023 return lowerSET_FPENV(Op, DAG);
9024 case ISD::RESET_FPENV:
9025 return lowerRESET_FPENV(Op, DAG);
9026 case ISD::GET_FPMODE:
9027 return lowerGET_FPMODE(Op, DAG);
9028 case ISD::SET_FPMODE:
9029 return lowerSET_FPMODE(Op, DAG);
9030 case ISD::RESET_FPMODE:
9031 return lowerRESET_FPMODE(Op, DAG);
9032 case ISD::EH_DWARF_CFA:
9033 return lowerEH_DWARF_CFA(Op, DAG);
9034 case ISD::VP_MERGE:
9035 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
9036 return lowerVPMergeMask(Op, DAG);
9037 [[fallthrough]];
9038 case ISD::VP_SELECT:
9039 case ISD::VP_ADD:
9040 case ISD::VP_SUB:
9041 case ISD::VP_MUL:
9042 case ISD::VP_SDIV:
9043 case ISD::VP_UDIV:
9044 case ISD::VP_SREM:
9045 case ISD::VP_UREM:
9046 case ISD::VP_UADDSAT:
9047 case ISD::VP_USUBSAT:
9048 case ISD::VP_SADDSAT:
9049 case ISD::VP_SSUBSAT:
9050 case ISD::VP_LRINT:
9051 case ISD::VP_LLRINT:
9052 return lowerVPOp(Op, DAG);
9053 case ISD::VP_AND:
9054 case ISD::VP_OR:
9055 case ISD::VP_XOR:
9056 return lowerLogicVPOp(Op, DAG);
9057 case ISD::VP_FADD:
9058 case ISD::VP_FSUB:
9059 case ISD::VP_FMUL:
9060 case ISD::VP_FDIV:
9061 case ISD::VP_FNEG:
9062 case ISD::VP_FABS:
9063 case ISD::VP_SQRT:
9064 case ISD::VP_FMA:
9065 case ISD::VP_FMINNUM:
9066 case ISD::VP_FMAXNUM:
9067 case ISD::VP_FCOPYSIGN:
9068 if (isPromotedOpNeedingSplit(Op, Subtarget))
9069 return SplitVPOp(Op, DAG);
9070 [[fallthrough]];
9071 case ISD::VP_SRA:
9072 case ISD::VP_SRL:
9073 case ISD::VP_SHL:
9074 return lowerVPOp(Op, DAG);
9075 case ISD::VP_IS_FPCLASS:
9076 return LowerIS_FPCLASS(Op, DAG);
9077 case ISD::VP_SIGN_EXTEND:
9078 case ISD::VP_ZERO_EXTEND:
9079 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
9080 return lowerVPExtMaskOp(Op, DAG);
9081 return lowerVPOp(Op, DAG);
9082 case ISD::VP_TRUNCATE:
9083 return lowerVectorTruncLike(Op, DAG);
9084 case ISD::VP_FP_EXTEND:
9085 case ISD::VP_FP_ROUND:
9086 return lowerVectorFPExtendOrRoundLike(Op, DAG);
9087 case ISD::VP_SINT_TO_FP:
9088 case ISD::VP_UINT_TO_FP:
9089 if (Op.getValueType().isVector() &&
9090 ((Op.getValueType().getScalarType() == MVT::f16 &&
9091 (Subtarget.hasVInstructionsF16Minimal() &&
9092 !Subtarget.hasVInstructionsF16())) ||
9093 Op.getValueType().getScalarType() == MVT::bf16)) {
9094 if (isPromotedOpNeedingSplit(Op, Subtarget))
9095 return SplitVectorOp(Op, DAG);
9096 // int -> f32
9097 SDLoc DL(Op);
9098 MVT NVT =
9099 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
9100 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
9101 // f32 -> [b]f16
9102 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
9103 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9104 }
9105 [[fallthrough]];
9106 case ISD::VP_FP_TO_SINT:
9107 case ISD::VP_FP_TO_UINT:
9108 if (SDValue Op1 = Op.getOperand(0);
9109 Op1.getValueType().isVector() &&
9110 ((Op1.getValueType().getScalarType() == MVT::f16 &&
9111 (Subtarget.hasVInstructionsF16Minimal() &&
9112 !Subtarget.hasVInstructionsF16())) ||
9113 Op1.getValueType().getScalarType() == MVT::bf16)) {
9114 if (isPromotedOpNeedingSplit(Op1, Subtarget))
9115 return SplitVectorOp(Op, DAG);
9116 // [b]f16 -> f32
9117 SDLoc DL(Op);
9118 MVT NVT = MVT::getVectorVT(MVT::f32,
9119 Op1.getValueType().getVectorElementCount());
9120 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
9121 // f32 -> int
9122 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
9123 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
9124 }
9125 return lowerVPFPIntConvOp(Op, DAG);
9126 case ISD::VP_SETCC:
9127 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
9128 return SplitVPOp(Op, DAG);
9129 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
9130 return lowerVPSetCCMaskOp(Op, DAG);
9131 [[fallthrough]];
9132 case ISD::VP_SMIN:
9133 case ISD::VP_SMAX:
9134 case ISD::VP_UMIN:
9135 case ISD::VP_UMAX:
9136 case ISD::VP_BITREVERSE:
9137 case ISD::VP_BSWAP:
9138 return lowerVPOp(Op, DAG);
9139 case ISD::VP_CTLZ:
9140 case ISD::VP_CTLZ_ZERO_UNDEF:
9141 if (Subtarget.hasStdExtZvbb())
9142 return lowerVPOp(Op, DAG);
9143 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
9144 case ISD::VP_CTTZ:
9145 case ISD::VP_CTTZ_ZERO_UNDEF:
9146 if (Subtarget.hasStdExtZvbb())
9147 return lowerVPOp(Op, DAG);
9148 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
9149 case ISD::VP_CTPOP:
9150 return lowerVPOp(Op, DAG);
9151 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
9152 return lowerVPStridedLoad(Op, DAG);
9153 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
9154 return lowerVPStridedStore(Op, DAG);
9155 case ISD::VP_FCEIL:
9156 case ISD::VP_FFLOOR:
9157 case ISD::VP_FRINT:
9158 case ISD::VP_FNEARBYINT:
9159 case ISD::VP_FROUND:
9160 case ISD::VP_FROUNDEVEN:
9161 case ISD::VP_FROUNDTOZERO:
9162 if (isPromotedOpNeedingSplit(Op, Subtarget))
9163 return SplitVPOp(Op, DAG);
9164 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
9165 case ISD::VP_FMAXIMUM:
9166 case ISD::VP_FMINIMUM:
9167 if (isPromotedOpNeedingSplit(Op, Subtarget))
9168 return SplitVPOp(Op, DAG);
9169 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
9170 case ISD::EXPERIMENTAL_VP_SPLICE:
9171 return lowerVPSpliceExperimental(Op, DAG);
9172 case ISD::EXPERIMENTAL_VP_REVERSE:
9173 return lowerVPReverseExperimental(Op, DAG);
9174 case ISD::CLEAR_CACHE: {
9175 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
9176 "llvm.clear_cache only needs custom lower on Linux targets");
9177 SDLoc DL(Op);
9178 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9179 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
9180 Op.getOperand(2), Flags, DL);
9181 }
9183 return lowerDYNAMIC_STACKALLOC(Op, DAG);
9185 return lowerINIT_TRAMPOLINE(Op, DAG);
9187 return lowerADJUST_TRAMPOLINE(Op, DAG);
9191 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
9192 }
9193}
9194
9195SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
9196 SDValue Start, SDValue End,
9197 SDValue Flags, SDLoc DL) const {
9198 MakeLibCallOptions CallOptions;
9199 std::pair<SDValue, SDValue> CallResult =
9200 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
9201 {Start, End, Flags}, CallOptions, DL, InChain);
9202
9203 // This function returns void so only the out chain matters.
9204 return CallResult.second;
9205}
9206
9207SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
9208 SelectionDAG &DAG) const {
9209 if (!Subtarget.is64Bit())
9210 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
9211
9212 // Create an MCCodeEmitter to encode instructions.
9213 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
9214 assert(TLO);
9215 MCContext &MCCtx = TLO->getContext();
9216
9217 std::unique_ptr<MCCodeEmitter> CodeEmitter(
9218 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
9219
9220 SDValue Root = Op.getOperand(0);
9221 SDValue Trmp = Op.getOperand(1); // trampoline
9222 SDLoc dl(Op);
9223
9224 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
9225
9226 // We store in the trampoline buffer the following instructions and data.
9227 // Offset:
9228 // 0: auipc t2, 0
9229 // 4: ld t0, 24(t2)
9230 // 8: ld t2, 16(t2)
9231 // 12: jalr t0
9232 // 16: <StaticChainOffset>
9233 // 24: <FunctionAddressOffset>
9234 // 32:
9235 // Offset with branch control flow protection enabled:
9236 // 0: lpad <imm20>
9237 // 4: auipc t3, 0
9238 // 8: ld t2, 28(t3)
9239 // 12: ld t3, 20(t3)
9240 // 16: jalr t2
9241 // 20: <StaticChainOffset>
9242 // 28: <FunctionAddressOffset>
9243 // 36:
9244
9245 const bool HasCFBranch =
9246 Subtarget.hasStdExtZicfilp() &&
9248 "cf-protection-branch");
9249 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
9250 const unsigned StaticChainOffset = StaticChainIdx * 4;
9251 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
9252
9253 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
9254 assert(STI);
9255 auto GetEncoding = [&](const MCInst &MC) {
9258 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
9259 uint32_t Encoding = support::endian::read32le(CB.data());
9260 return Encoding;
9261 };
9262
9263 SmallVector<SDValue> OutChains;
9264
9265 SmallVector<uint32_t> Encodings;
9266 if (!HasCFBranch) {
9267 Encodings.append(
9268 {// auipc t2, 0
9269 // Loads the current PC into t2.
9270 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
9271 // ld t0, 24(t2)
9272 // Loads the function address into t0. Note that we are using offsets
9273 // pc-relative to the first instruction of the trampoline.
9274 GetEncoding(MCInstBuilder(RISCV::LD)
9275 .addReg(RISCV::X5)
9276 .addReg(RISCV::X7)
9277 .addImm(FunctionAddressOffset)),
9278 // ld t2, 16(t2)
9279 // Load the value of the static chain.
9280 GetEncoding(MCInstBuilder(RISCV::LD)
9281 .addReg(RISCV::X7)
9282 .addReg(RISCV::X7)
9283 .addImm(StaticChainOffset)),
9284 // jalr t0
9285 // Jump to the function.
9286 GetEncoding(MCInstBuilder(RISCV::JALR)
9287 .addReg(RISCV::X0)
9288 .addReg(RISCV::X5)
9289 .addImm(0))});
9290 } else {
9291 Encodings.append(
9292 {// auipc x0, <imm20> (lpad <imm20>)
9293 // Landing pad.
9294 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
9295 // auipc t3, 0
9296 // Loads the current PC into t3.
9297 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
9298 // ld t2, (FunctionAddressOffset - 4)(t3)
9299 // Loads the function address into t2. Note that we are using offsets
9300 // pc-relative to the SECOND instruction of the trampoline.
9301 GetEncoding(MCInstBuilder(RISCV::LD)
9302 .addReg(RISCV::X7)
9303 .addReg(RISCV::X28)
9304 .addImm(FunctionAddressOffset - 4)),
9305 // ld t3, (StaticChainOffset - 4)(t3)
9306 // Load the value of the static chain.
9307 GetEncoding(MCInstBuilder(RISCV::LD)
9308 .addReg(RISCV::X28)
9309 .addReg(RISCV::X28)
9310 .addImm(StaticChainOffset - 4)),
9311 // jalr t2
9312 // Software-guarded jump to the function.
9313 GetEncoding(MCInstBuilder(RISCV::JALR)
9314 .addReg(RISCV::X0)
9315 .addReg(RISCV::X7)
9316 .addImm(0))});
9317 }
9318
9319 // Store encoded instructions.
9320 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
9321 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
9322 DAG.getConstant(Idx * 4, dl, MVT::i64))
9323 : Trmp;
9324 OutChains.push_back(DAG.getTruncStore(
9325 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
9326 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
9327 }
9328
9329 // Now store the variable part of the trampoline.
9330 SDValue FunctionAddress = Op.getOperand(2);
9331 SDValue StaticChain = Op.getOperand(3);
9332
9333 // Store the given static chain and function pointer in the trampoline buffer.
9334 struct OffsetValuePair {
9335 const unsigned Offset;
9336 const SDValue Value;
9337 SDValue Addr = SDValue(); // Used to cache the address.
9338 } OffsetValues[] = {
9339 {StaticChainOffset, StaticChain},
9340 {FunctionAddressOffset, FunctionAddress},
9341 };
9342 for (auto &OffsetValue : OffsetValues) {
9343 SDValue Addr =
9344 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
9345 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
9346 OffsetValue.Addr = Addr;
9347 OutChains.push_back(
9348 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
9349 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
9350 }
9351
9352 assert(OutChains.size() == StaticChainIdx + 2 &&
9353 "Size of OutChains mismatch");
9354 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
9355
9356 // The end of instructions of trampoline is the same as the static chain
9357 // address that we computed earlier.
9358 SDValue EndOfTrmp = OffsetValues[0].Addr;
9359
9360 // Call clear cache on the trampoline instructions.
9361 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
9362 Trmp, EndOfTrmp);
9363
9364 return Chain;
9365}
9366
9367SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
9368 SelectionDAG &DAG) const {
9369 if (!Subtarget.is64Bit())
9370 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
9371
9372 return Op.getOperand(0);
9373}
9374
9375SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
9376 SelectionDAG &DAG) const {
9377 // Currently, only the vdota4 and vdota4u case (from zvdot4a8i) should be
9378 // legal.
9379 // TODO: There are many other sub-cases we could potentially lower, are
9380 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
9381 SDLoc DL(Op);
9382 MVT VT = Op.getSimpleValueType();
9383 SDValue Accum = Op.getOperand(0);
9384 assert(Accum.getSimpleValueType() == VT &&
9385 VT.getVectorElementType() == MVT::i32);
9386 SDValue A = Op.getOperand(1);
9387 SDValue B = Op.getOperand(2);
9388 MVT ArgVT = A.getSimpleValueType();
9389 assert(ArgVT == B.getSimpleValueType() &&
9390 ArgVT.getVectorElementType() == MVT::i8);
9391 (void)ArgVT;
9392
9393 // The zvdot4a8i pseudos are defined with sources and destination both
9394 // being i32. This cast is needed for correctness to avoid incorrect
9395 // .vx matching of i8 splats.
9396 A = DAG.getBitcast(VT, A);
9397 B = DAG.getBitcast(VT, B);
9398
9399 MVT ContainerVT = VT;
9400 if (VT.isFixedLengthVector()) {
9401 ContainerVT = getContainerForFixedLengthVector(VT);
9402 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
9403 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
9404 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
9405 }
9406
9407 unsigned Opc;
9408 switch (Op.getOpcode()) {
9410 Opc = RISCVISD::VDOTA4_VL;
9411 break;
9413 Opc = RISCVISD::VDOTA4U_VL;
9414 break;
9416 Opc = RISCVISD::VDOTA4SU_VL;
9417 break;
9418 default:
9419 llvm_unreachable("Unexpected opcode");
9420 }
9421 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
9422 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
9423 if (VT.isFixedLengthVector())
9424 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
9425 return Res;
9426}
9427
9429 SelectionDAG &DAG, unsigned Flags) {
9430 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
9431}
9432
9434 SelectionDAG &DAG, unsigned Flags) {
9435 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
9436 Flags);
9437}
9438
9440 SelectionDAG &DAG, unsigned Flags) {
9441 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
9442 N->getOffset(), Flags);
9443}
9444
9446 SelectionDAG &DAG, unsigned Flags) {
9447 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
9448}
9449
9451 EVT Ty, SelectionDAG &DAG) {
9453 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
9454 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
9455 return DAG.getLoad(
9456 Ty, DL, DAG.getEntryNode(), LC,
9458}
9459
9461 EVT Ty, SelectionDAG &DAG) {
9463 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
9464 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
9465 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
9466 return DAG.getLoad(
9467 Ty, DL, DAG.getEntryNode(), LC,
9469}
9470
9471template <class NodeTy>
9472SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
9473 bool IsLocal, bool IsExternWeak) const {
9474 SDLoc DL(N);
9475 EVT Ty = getPointerTy(DAG.getDataLayout());
9476
9477 // When HWASAN is used and tagging of global variables is enabled
9478 // they should be accessed via the GOT, since the tagged address of a global
9479 // is incompatible with existing code models. This also applies to non-pic
9480 // mode.
9481 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
9482 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9483 if (IsLocal && !Subtarget.allowTaggedGlobals())
9484 // Use PC-relative addressing to access the symbol. This generates the
9485 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
9486 // %pcrel_lo(auipc)).
9487 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9488
9489 // Use PC-relative addressing to access the GOT for this symbol, then load
9490 // the address from the GOT. This generates the pattern (PseudoLGA sym),
9491 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
9492 SDValue Load =
9493 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
9494 MachineFunction &MF = DAG.getMachineFunction();
9495 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9499 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9500 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9501 return Load;
9502 }
9503
9504 switch (getTargetMachine().getCodeModel()) {
9505 default:
9506 reportFatalUsageError("Unsupported code model for lowering");
9507 case CodeModel::Small: {
9508 // Generate a sequence for accessing addresses within the first 2 GiB of
9509 // address space.
9510 if (Subtarget.hasVendorXqcili()) {
9511 // Use QC.E.LI to generate the address, as this is easier to relax than
9512 // LUI/ADDI.
9513 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9514 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
9515 }
9516
9517 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
9518 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
9519 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
9520 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9521 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
9522 }
9523 case CodeModel::Medium: {
9524 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9525 if (IsExternWeak) {
9526 // An extern weak symbol may be undefined, i.e. have value 0, which may
9527 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
9528 // symbol. This generates the pattern (PseudoLGA sym), which expands to
9529 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
9530 SDValue Load =
9531 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
9532 MachineFunction &MF = DAG.getMachineFunction();
9533 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9537 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9538 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9539 return Load;
9540 }
9541
9542 // Generate a sequence for accessing addresses within any 2GiB range within
9543 // the address space. This generates the pattern (PseudoLLA sym), which
9544 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
9545 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9546 }
9547 case CodeModel::Large: {
9548 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
9549 return getLargeGlobalAddress(G, DL, Ty, DAG);
9550
9551 // Using pc-relative mode for other node type.
9552 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9553 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9554 }
9555 }
9556}
9557
9558SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
9559 SelectionDAG &DAG) const {
9560 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9561 assert(N->getOffset() == 0 && "unexpected offset in global node");
9562 const GlobalValue *GV = N->getGlobal();
9563 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(GV);
9564 return getAddr(N, DAG, IsLocal, GV->hasExternalWeakLinkage());
9565}
9566
9567SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
9568 SelectionDAG &DAG) const {
9569 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
9570
9571 return getAddr(N, DAG);
9572}
9573
9574SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
9575 SelectionDAG &DAG) const {
9576 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
9577
9578 return getAddr(N, DAG);
9579}
9580
9581SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
9582 SelectionDAG &DAG) const {
9583 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
9584
9585 return getAddr(N, DAG);
9586}
9587
9588SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
9589 SelectionDAG &DAG,
9590 bool UseGOT) const {
9591 SDLoc DL(N);
9592 EVT Ty = getPointerTy(DAG.getDataLayout());
9593 const GlobalValue *GV = N->getGlobal();
9594 MVT XLenVT = Subtarget.getXLenVT();
9595
9596 if (UseGOT) {
9597 // Use PC-relative addressing to access the GOT for this TLS symbol, then
9598 // load the address from the GOT and add the thread pointer. This generates
9599 // the pattern (PseudoLA_TLS_IE sym), which expands to
9600 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
9601 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9602 SDValue Load =
9603 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
9604 MachineFunction &MF = DAG.getMachineFunction();
9605 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9609 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9610 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9611
9612 // Add the thread pointer.
9613 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9614 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9615 }
9616
9617 // Generate a sequence for accessing the address relative to the thread
9618 // pointer, with the appropriate adjustment for the thread pointer offset.
9619 // This generates the pattern
9620 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9621 SDValue AddrHi =
9623 SDValue AddrAdd =
9625 SDValue AddrLo =
9627
9628 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9629 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9630 SDValue MNAdd =
9631 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9632 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9633}
9634
9635SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9636 SelectionDAG &DAG) const {
9637 SDLoc DL(N);
9638 EVT Ty = getPointerTy(DAG.getDataLayout());
9639 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9640 const GlobalValue *GV = N->getGlobal();
9641
9642 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9643 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9644 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9645 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9646 SDValue Load =
9647 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9648
9649 // Prepare argument list to generate call.
9651 Args.emplace_back(Load, CallTy);
9652
9653 // Setup call to __tls_get_addr.
9654 TargetLowering::CallLoweringInfo CLI(DAG);
9655 CLI.setDebugLoc(DL)
9656 .setChain(DAG.getEntryNode())
9657 .setLibCallee(CallingConv::C, CallTy,
9658 DAG.getExternalSymbol("__tls_get_addr", Ty),
9659 std::move(Args));
9660
9661 return LowerCallTo(CLI).first;
9662}
9663
9664SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9665 SelectionDAG &DAG) const {
9666 SDLoc DL(N);
9667 EVT Ty = getPointerTy(DAG.getDataLayout());
9668 const GlobalValue *GV = N->getGlobal();
9669
9670 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9671 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9672 //
9673 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9674 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9675 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9676 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9677 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9678 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9679}
9680
9681SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9682 SelectionDAG &DAG) const {
9683 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9684 assert(N->getOffset() == 0 && "unexpected offset in global node");
9685
9686 if (DAG.getTarget().useEmulatedTLS())
9687 return LowerToTLSEmulatedModel(N, DAG);
9688
9690
9693 reportFatalUsageError("In GHC calling convention TLS is not supported");
9694
9695 SDValue Addr;
9696 switch (Model) {
9698 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9699 break;
9701 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9702 break;
9705 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9706 : getDynamicTLSAddr(N, DAG);
9707 break;
9708 }
9709
9710 return Addr;
9711}
9712
9713// Return true if Val is equal to (setcc LHS, RHS, CC).
9714// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9715// Otherwise, return std::nullopt.
9716static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9717 ISD::CondCode CC, SDValue Val) {
9718 assert(Val->getOpcode() == ISD::SETCC);
9719 SDValue LHS2 = Val.getOperand(0);
9720 SDValue RHS2 = Val.getOperand(1);
9721 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9722
9723 if (LHS == LHS2 && RHS == RHS2) {
9724 if (CC == CC2)
9725 return true;
9726 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9727 return false;
9728 } else if (LHS == RHS2 && RHS == LHS2) {
9730 if (CC == CC2)
9731 return true;
9732 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9733 return false;
9734 }
9735
9736 return std::nullopt;
9737}
9738
9740 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9741}
9742
9744 const RISCVSubtarget &Subtarget) {
9745 SDValue CondV = N->getOperand(0);
9746 SDValue TrueV = N->getOperand(1);
9747 SDValue FalseV = N->getOperand(2);
9748 MVT VT = N->getSimpleValueType(0);
9749 SDLoc DL(N);
9750
9751 if (!Subtarget.hasConditionalMoveFusion()) {
9752 // (select c, -1, y) -> -c | y
9753 if (isAllOnesConstant(TrueV)) {
9754 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9755 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9756 }
9757 // (select c, y, -1) -> (c-1) | y
9758 if (isAllOnesConstant(FalseV)) {
9759 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9760 DAG.getAllOnesConstant(DL, VT));
9761 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9762 }
9763
9764 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9765
9766 // (select c, 0, y) -> (c-1) & y
9767 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9768 SDValue Neg =
9769 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9770 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9771 }
9772 if (isNullConstant(FalseV)) {
9773 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9774 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9775 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9776 if (isPowerOf2_64(TrueM1)) {
9777 unsigned ShAmount = Log2_64(TrueM1);
9778 if (Subtarget.hasShlAdd(ShAmount))
9779 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9780 DAG.getTargetConstant(ShAmount, DL, VT), CondV);
9781 }
9782 }
9783 // (select c, y, 0) -> -c & y
9784 if (!HasCZero || isSimm12Constant(TrueV)) {
9785 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9786 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9787 }
9788 }
9789 }
9790
9791 // select c, ~x, x --> xor -c, x
9792 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9793 const APInt &TrueVal = TrueV->getAsAPIntVal();
9794 const APInt &FalseVal = FalseV->getAsAPIntVal();
9795 if (~TrueVal == FalseVal) {
9796 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9797 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9798 }
9799 }
9800
9801 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9802 // when both truev and falsev are also setcc.
9803 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9804 FalseV.getOpcode() == ISD::SETCC) {
9805 SDValue LHS = CondV.getOperand(0);
9806 SDValue RHS = CondV.getOperand(1);
9807 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9808
9809 // (select x, x, y) -> x | y
9810 // (select !x, x, y) -> x & y
9811 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9812 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9813 DAG.getFreeze(FalseV));
9814 }
9815 // (select x, y, x) -> x & y
9816 // (select !x, y, x) -> x | y
9817 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9818 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9819 DAG.getFreeze(TrueV), FalseV);
9820 }
9821 }
9822
9823 return SDValue();
9824}
9825
9826// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9827// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9828// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9829// being `0` or `-1`. In such cases we can replace `select` with `and`.
9830// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9831// than `c0`?
9832static SDValue
9834 const RISCVSubtarget &Subtarget) {
9835 if (Subtarget.hasShortForwardBranchIALU())
9836 return SDValue();
9837
9838 unsigned SelOpNo = 0;
9839 SDValue Sel = BO->getOperand(0);
9840 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9841 SelOpNo = 1;
9842 Sel = BO->getOperand(1);
9843 }
9844
9845 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9846 return SDValue();
9847
9848 unsigned ConstSelOpNo = 1;
9849 unsigned OtherSelOpNo = 2;
9850 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9851 ConstSelOpNo = 2;
9852 OtherSelOpNo = 1;
9853 }
9854 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9855 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9856 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9857 return SDValue();
9858
9859 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9860 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9861 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9862 return SDValue();
9863
9864 SDLoc DL(Sel);
9865 EVT VT = BO->getValueType(0);
9866
9867 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9868 if (SelOpNo == 1)
9869 std::swap(NewConstOps[0], NewConstOps[1]);
9870
9871 SDValue NewConstOp =
9872 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9873 if (!NewConstOp)
9874 return SDValue();
9875
9876 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9877 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9878 return SDValue();
9879
9880 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9881 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9882 if (SelOpNo == 1)
9883 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9884 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9885
9886 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9887 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9888 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9889}
9890
9891SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9892 SDValue CondV = Op.getOperand(0);
9893 SDValue TrueV = Op.getOperand(1);
9894 SDValue FalseV = Op.getOperand(2);
9895 SDLoc DL(Op);
9896 MVT VT = Op.getSimpleValueType();
9897 MVT XLenVT = Subtarget.getXLenVT();
9898
9899 // Handle P extension packed types by bitcasting to XLenVT for selection,
9900 // e.g. select i1 %cond, <2 x i16> %TrueV, <2 x i16> %FalseV
9901 // These types fit in a single GPR so can use the same selection mechanism
9902 // as scalars.
9903 if (Subtarget.isPExtPackedType(VT)) {
9904 SDValue TrueVInt = DAG.getBitcast(XLenVT, TrueV);
9905 SDValue FalseVInt = DAG.getBitcast(XLenVT, FalseV);
9906 SDValue ResultInt =
9907 DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);
9908 return DAG.getBitcast(VT, ResultInt);
9909 }
9910
9911 // Lower vector SELECTs to VSELECTs by splatting the condition.
9912 if (VT.isVector()) {
9913 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9914 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9915 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9916 }
9917
9918 // Try some other optimizations before falling back to generic lowering.
9919 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9920 return V;
9921
9922 // When there is no cost for GPR <-> FPR, we can use zicond select for
9923 // floating value when CondV is int type
9924 bool FPinGPR = Subtarget.hasStdExtZfinx();
9925
9926 // We can handle FGPR without spliting into hi/lo parts
9927 bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(),
9928 Subtarget.getXLenVT().getSizeInBits());
9929
9930 bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR &&
9931 VT.isFloatingPoint() && FitsInGPR;
9932
9933 if (UseZicondForFPSel) {
9934
9935 auto CastToInt = [&](SDValue V) -> SDValue {
9936 // Treat +0.0 as int 0 to enable single 'czero' instruction generation.
9937 if (isNullFPConstant(V))
9938 return DAG.getConstant(0, DL, XLenVT);
9939
9940 if (VT == MVT::f16)
9941 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V);
9942
9943 if (VT == MVT::f32 && Subtarget.is64Bit())
9944 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V);
9945
9946 return DAG.getBitcast(XLenVT, V);
9947 };
9948
9949 SDValue TrueVInt = CastToInt(TrueV);
9950 SDValue FalseVInt = CastToInt(FalseV);
9951
9952 // Emit integer SELECT (lowers to Zicond)
9953 SDValue ResultInt =
9954 DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);
9955
9956 // Convert back to floating VT
9957 if (VT == MVT::f32 && Subtarget.is64Bit())
9958 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt);
9959
9960 if (VT == MVT::f16)
9961 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt);
9962
9963 return DAG.getBitcast(VT, ResultInt);
9964 }
9965
9966 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9967 // nodes to implement the SELECT. Performing the lowering here allows for
9968 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9969 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9970 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9971
9972 // (select c, t, 0) -> (czero_eqz t, c)
9973 if (isNullConstant(FalseV))
9974 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9975 // (select c, 0, f) -> (czero_nez f, c)
9976 if (isNullConstant(TrueV))
9977 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9978
9979 // Check to see if a given operation is a 'NOT', if so return the negated
9980 // operand
9981 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9982 using namespace llvm::SDPatternMatch;
9983 SDValue Xor;
9984 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9985 return Xor;
9986 }
9987 return std::nullopt;
9988 };
9989 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9990 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9991 if (TrueV.getOpcode() == ISD::AND &&
9992 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9993 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9994 ? getNotOperand(TrueV.getOperand(1))
9995 : getNotOperand(TrueV.getOperand(0));
9996 if (NotOperand) {
9997 SDValue CMOV =
9998 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9999 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
10000 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
10001 }
10002 return DAG.getNode(
10003 ISD::OR, DL, VT, TrueV,
10004 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
10005 }
10006
10007 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
10008 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
10009 if (FalseV.getOpcode() == ISD::AND &&
10010 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
10011 auto NotOperand = (FalseV.getOperand(0) == TrueV)
10012 ? getNotOperand(FalseV.getOperand(1))
10013 : getNotOperand(FalseV.getOperand(0));
10014 if (NotOperand) {
10015 SDValue CMOV =
10016 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
10017 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
10018 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
10019 }
10020 return DAG.getNode(
10021 ISD::OR, DL, VT, FalseV,
10022 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
10023 }
10024
10025 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
10026 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
10027 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
10028 const APInt &TrueVal = TrueV->getAsAPIntVal();
10029 const APInt &FalseVal = FalseV->getAsAPIntVal();
10030
10031 // Prefer these over Zicond to avoid materializing an immediate:
10032 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
10033 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
10034 if (CondV.getOpcode() == ISD::SETCC &&
10035 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
10036 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
10037 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
10038 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
10039 int64_t TrueImm = TrueVal.getSExtValue();
10040 int64_t FalseImm = FalseVal.getSExtValue();
10041 if (CCVal == ISD::SETGT)
10042 std::swap(TrueImm, FalseImm);
10043 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
10044 isInt<12>(TrueImm - FalseImm)) {
10045 SDValue SRA =
10046 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
10047 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
10048 SDValue AND =
10049 DAG.getNode(ISD::AND, DL, VT, SRA,
10050 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
10051 return DAG.getNode(ISD::ADD, DL, VT, AND,
10052 DAG.getSignedConstant(FalseImm, DL, VT));
10053 }
10054 }
10055 }
10056
10057 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
10058 // a constant in register
10059 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
10060 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
10061 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
10062 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
10063 }
10064 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
10065 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
10066 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
10067 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
10068 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
10069 }
10070
10071 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
10072 const int DeltaCost = RISCVMatInt::getIntMatCost(
10073 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
10074 // Does the addend fold into an ADDI
10075 if (Addend.isSignedIntN(12))
10076 return DeltaCost;
10077 const int AddendCost = RISCVMatInt::getIntMatCost(
10078 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
10079 return AddendCost + DeltaCost;
10080 };
10081 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
10082 getCost(TrueVal - FalseVal, FalseVal);
10083 SDValue LHSVal = DAG.getConstant(
10084 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
10085 SDValue CMOV =
10086 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
10087 DL, VT, LHSVal, CondV);
10088 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
10089 }
10090
10091 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
10092 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
10093 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
10094 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
10095 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
10096 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
10097 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
10098 // Efficient only if the constant and its negation fit into `ADDI`
10099 // Prefer Add/Sub over Xor since can be compressed for small immediates
10100 if (isInt<12>(RawConstVal)) {
10101 // Fall back to XORI if Const == -0x800 since we don't have SUBI.
10102 unsigned SubOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::SUB;
10103 unsigned AddOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::ADD;
10104 SDValue SubOp = DAG.getNode(SubOpc, DL, VT, RegV, ConstVal);
10105 SDValue CZERO =
10106 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
10107 DL, VT, SubOp, CondV);
10108 return DAG.getNode(AddOpc, DL, VT, CZERO, ConstVal);
10109 }
10110 }
10111
10112 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
10113 // Unless we have the short forward branch optimization.
10114 if (!Subtarget.hasConditionalMoveFusion())
10115 return DAG.getNode(
10116 ISD::OR, DL, VT,
10117 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
10118 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
10120 }
10121
10122 if (Op.hasOneUse()) {
10123 unsigned UseOpc = Op->user_begin()->getOpcode();
10124 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
10125 SDNode *BinOp = *Op->user_begin();
10126 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
10127 DAG, Subtarget)) {
10128 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
10129 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
10130 // may return a constant node and cause crash in lowerSELECT.
10131 if (NewSel.getOpcode() == ISD::SELECT)
10132 return lowerSELECT(NewSel, DAG);
10133 return NewSel;
10134 }
10135 }
10136 }
10137
10138 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
10139 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
10140 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
10141 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
10142 if (FPTV && FPFV) {
10143 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
10144 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
10145 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
10146 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
10147 DAG.getConstant(1, DL, XLenVT));
10148 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
10149 }
10150 }
10151
10152 // If the condition is not an integer SETCC which operates on XLenVT, we need
10153 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
10154 // (select condv, truev, falsev)
10155 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
10156 if (CondV.getOpcode() != ISD::SETCC ||
10157 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
10158 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10159 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
10160
10161 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
10162
10163 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
10164 }
10165
10166 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
10167 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
10168 // advantage of the integer compare+branch instructions. i.e.:
10169 // (select (setcc lhs, rhs, cc), truev, falsev)
10170 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
10171 SDValue LHS = CondV.getOperand(0);
10172 SDValue RHS = CondV.getOperand(1);
10173 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
10174
10175 // Special case for a select of 2 constants that have a difference of 1.
10176 // Normally this is done by DAGCombine, but if the select is introduced by
10177 // type legalization or op legalization, we miss it. Restricting to SETLT
10178 // case for now because that is what signed saturating add/sub need.
10179 // FIXME: We don't need the condition to be SETLT or even a SETCC,
10180 // but we would probably want to swap the true/false values if the condition
10181 // is SETGE/SETLE to avoid an XORI.
10182 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
10183 CCVal == ISD::SETLT) {
10184 const APInt &TrueVal = TrueV->getAsAPIntVal();
10185 const APInt &FalseVal = FalseV->getAsAPIntVal();
10186 if (TrueVal - 1 == FalseVal)
10187 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
10188 if (TrueVal + 1 == FalseVal)
10189 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
10190 }
10191
10192 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
10193 // 1 < x ? x : 1 -> 0 < x ? x : 1
10194 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
10195 RHS == TrueV && LHS == FalseV) {
10196 LHS = DAG.getConstant(0, DL, VT);
10197 // 0 <u x is the same as x != 0.
10198 if (CCVal == ISD::SETULT) {
10199 std::swap(LHS, RHS);
10200 CCVal = ISD::SETNE;
10201 }
10202 }
10203
10204 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
10205 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
10206 RHS == FalseV) {
10207 RHS = DAG.getConstant(0, DL, VT);
10208 }
10209
10210 SDValue TargetCC = DAG.getCondCode(CCVal);
10211
10212 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
10213 // (select (setcc lhs, rhs, CC), constant, falsev)
10214 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
10215 std::swap(TrueV, FalseV);
10216 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
10217 }
10218
10219 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
10220 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
10221}
10222
10223SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
10224 SDValue CondV = Op.getOperand(1);
10225 SDLoc DL(Op);
10226 MVT XLenVT = Subtarget.getXLenVT();
10227
10228 if (CondV.getOpcode() == ISD::SETCC &&
10229 CondV.getOperand(0).getValueType() == XLenVT) {
10230 SDValue LHS = CondV.getOperand(0);
10231 SDValue RHS = CondV.getOperand(1);
10232 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
10233
10234 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
10235
10236 SDValue TargetCC = DAG.getCondCode(CCVal);
10237 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
10238 LHS, RHS, TargetCC, Op.getOperand(2));
10239 }
10240
10241 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
10242 CondV, DAG.getConstant(0, DL, XLenVT),
10243 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
10244}
10245
10246SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
10247 MachineFunction &MF = DAG.getMachineFunction();
10248 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
10249
10250 SDLoc DL(Op);
10251 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
10253
10254 // vastart just stores the address of the VarArgsFrameIndex slot into the
10255 // memory location argument.
10256 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10257 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
10258 MachinePointerInfo(SV));
10259}
10260
10261SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
10262 SelectionDAG &DAG) const {
10263 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
10264 MachineFunction &MF = DAG.getMachineFunction();
10265 MachineFrameInfo &MFI = MF.getFrameInfo();
10266 MFI.setFrameAddressIsTaken(true);
10267 Register FrameReg = RI.getFrameRegister(MF);
10268 int XLenInBytes = Subtarget.getXLen() / 8;
10269
10270 EVT VT = Op.getValueType();
10271 SDLoc DL(Op);
10272 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
10273 unsigned Depth = Op.getConstantOperandVal(0);
10274 while (Depth--) {
10275 int Offset = -(XLenInBytes * 2);
10276 SDValue Ptr = DAG.getNode(
10277 ISD::ADD, DL, VT, FrameAddr,
10279 FrameAddr =
10280 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
10281 }
10282 return FrameAddr;
10283}
10284
10285SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
10286 SelectionDAG &DAG) const {
10287 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
10288 MachineFunction &MF = DAG.getMachineFunction();
10289 MachineFrameInfo &MFI = MF.getFrameInfo();
10290 MFI.setReturnAddressIsTaken(true);
10291 MVT XLenVT = Subtarget.getXLenVT();
10292 int XLenInBytes = Subtarget.getXLen() / 8;
10293
10294 EVT VT = Op.getValueType();
10295 SDLoc DL(Op);
10296 unsigned Depth = Op.getConstantOperandVal(0);
10297 if (Depth) {
10298 int Off = -XLenInBytes;
10299 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
10300 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
10301 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
10302 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
10303 MachinePointerInfo());
10304 }
10305
10306 // Return the value of the return address register, marking it an implicit
10307 // live-in.
10308 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
10309 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
10310}
10311
10312SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
10313 SelectionDAG &DAG) const {
10314 SDLoc DL(Op);
10315 SDValue Lo = Op.getOperand(0);
10316 SDValue Hi = Op.getOperand(1);
10317 SDValue Shamt = Op.getOperand(2);
10318 EVT VT = Lo.getValueType();
10319 unsigned XLen = Subtarget.getXLen();
10320
10321 // With P extension, use SLX (FSHL) for the high part.
10322 if (Subtarget.hasStdExtP()) {
10323 // HiRes = fshl(Hi, Lo, Shamt) - correct when Shamt < XLen
10324 SDValue HiRes = DAG.getNode(ISD::FSHL, DL, VT, Hi, Lo, Shamt);
10325 // LoRes = Lo << Shamt - correct Lo when Shamt < XLen,
10326 // Mask shift amount to avoid UB when Shamt >= XLen.
10327 SDValue ShamtMasked =
10328 DAG.getNode(ISD::AND, DL, VT, Shamt, DAG.getConstant(XLen - 1, DL, VT));
10329 SDValue LoRes = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMasked);
10330
10331 // Create a mask that is -1 when Shamt >= XLen, 0 otherwise.
10332 // FIXME: We should use a select and let LowerSelect make the
10333 // optimizations.
10334 SDValue ShAmtExt =
10335 DAG.getNode(ISD::SHL, DL, VT, Shamt,
10336 DAG.getConstant(XLen - Log2_32(XLen) - 1, DL, VT));
10337 SDValue Mask = DAG.getNode(ISD::SRA, DL, VT, ShAmtExt,
10338 DAG.getConstant(XLen - 1, DL, VT));
10339
10340 // When Shamt >= XLen: HiRes = LoRes, LoRes = 0
10341 // HiRes = (HiRes & ~Mask) | (LoRes & Mask)
10342 SDValue HiMasked =
10343 DAG.getNode(ISD::AND, DL, VT, HiRes, DAG.getNOT(DL, Mask, VT));
10344 SDValue LoMasked = DAG.getNode(ISD::AND, DL, VT, LoRes, Mask);
10345 HiRes =
10346 DAG.getNode(ISD::OR, DL, VT, HiMasked, LoMasked, SDNodeFlags::Disjoint);
10347
10348 // LoRes = LoRes & ~Mask (clear when Shamt >= XLen)
10349 LoRes = DAG.getNode(ISD::AND, DL, VT, LoRes, DAG.getNOT(DL, Mask, VT));
10350
10351 return DAG.getMergeValues({LoRes, HiRes}, DL);
10352 }
10353
10354 // if Shamt-XLEN < 0: // Shamt < XLEN
10355 // Lo = Lo << Shamt
10356 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
10357 // else:
10358 // Lo = 0
10359 // Hi = Lo << (Shamt-XLEN)
10360
10361 SDValue Zero = DAG.getConstant(0, DL, VT);
10362 SDValue One = DAG.getConstant(1, DL, VT);
10363 SDValue MinusXLen = DAG.getSignedConstant(-(int)XLen, DL, VT);
10364 SDValue XLenMinus1 = DAG.getConstant(XLen - 1, DL, VT);
10365 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
10366 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
10367
10368 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
10369 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
10370 SDValue ShiftRightLo =
10371 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
10372 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
10373 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
10374 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
10375
10376 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
10377
10378 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
10379 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
10380
10381 SDValue Parts[2] = {Lo, Hi};
10382 return DAG.getMergeValues(Parts, DL);
10383}
10384
10385SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
10386 bool IsSRA) const {
10387 SDLoc DL(Op);
10388 SDValue Lo = Op.getOperand(0);
10389 SDValue Hi = Op.getOperand(1);
10390 SDValue Shamt = Op.getOperand(2);
10391 EVT VT = Lo.getValueType();
10392
10393 // With P extension, use NSRL/NSRA for RV32 or FSHR (SRX) for RV64.
10394 if (Subtarget.hasStdExtP()) {
10395 unsigned XLen = Subtarget.getXLen();
10396
10397 SDValue LoRes;
10398 if (Subtarget.is64Bit()) {
10399 // On RV64, use FSHR (SRX instruction) for the low part. We will need
10400 // to fix this later if ShAmt >= 64.
10401 LoRes = DAG.getNode(ISD::FSHR, DL, VT, Hi, Lo, Shamt);
10402 } else {
10403 // On RV32, use NSRL/NSRA for the low part.
10404 // NSRL/NSRA read 6 bits of shift amount, so they handle Shamt >= 32
10405 // correctly.
10406 LoRes = DAG.getNode(IsSRA ? RISCVISD::NSRA : RISCVISD::NSRL, DL, VT, Lo,
10407 Hi, Shamt);
10408 }
10409
10410 // Mask shift amount to avoid UB when Shamt >= XLen.
10411 SDValue ShamtMasked =
10412 DAG.getNode(ISD::AND, DL, VT, Shamt, DAG.getConstant(XLen - 1, DL, VT));
10413 SDValue HiRes =
10414 DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, ShamtMasked);
10415
10416 // Create a mask that is -1 when Shamt >= XLen, 0 otherwise.
10417 // FIXME: We should use a select and let LowerSelect make the
10418 // optimizations.
10419 SDValue ShAmtExt =
10420 DAG.getNode(ISD::SHL, DL, VT, Shamt,
10421 DAG.getConstant(XLen - Log2_32(XLen) - 1, DL, VT));
10422 SDValue Mask = DAG.getNode(ISD::SRA, DL, VT, ShAmtExt,
10423 DAG.getConstant(XLen - 1, DL, VT));
10424
10425 if (Subtarget.is64Bit()) {
10426 // On RV64, FSHR masks shift amount to 63. We need to replace LoRes
10427 // with HiRes when Shamt >= 64.
10428 // LoRes = (LoRes & ~Mask) | (HiRes & Mask)
10429 SDValue LoMasked =
10430 DAG.getNode(ISD::AND, DL, VT, LoRes, DAG.getNOT(DL, Mask, VT));
10431 SDValue HiMasked = DAG.getNode(ISD::AND, DL, VT, HiRes, Mask);
10432 LoRes = DAG.getNode(ISD::OR, DL, VT, LoMasked, HiMasked,
10434 }
10435
10436 // If ShAmt >= XLen, we need to replace HiRes with 0 or sign bits.
10437 if (IsSRA) {
10438 // sra hi, hi, (mask & (XLen-1)) - shifts by XLen-1 when shamt >= XLen
10439 SDValue MaskAmt = DAG.getNode(ISD::AND, DL, VT, Mask,
10440 DAG.getConstant(XLen - 1, DL, VT));
10441 HiRes = DAG.getNode(ISD::SRA, DL, VT, HiRes, MaskAmt);
10442 } else {
10443 // andn hi, hi, mask - clears hi when shamt >= XLen
10444 HiRes = DAG.getNode(ISD::AND, DL, VT, HiRes, DAG.getNOT(DL, Mask, VT));
10445 }
10446
10447 return DAG.getMergeValues({LoRes, HiRes}, DL);
10448 }
10449
10450 // SRA expansion:
10451 // if Shamt-XLEN < 0: // Shamt < XLEN
10452 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
10453 // Hi = Hi >>s Shamt
10454 // else:
10455 // Lo = Hi >>s (Shamt-XLEN);
10456 // Hi = Hi >>s (XLEN-1)
10457 //
10458 // SRL expansion:
10459 // if Shamt-XLEN < 0: // Shamt < XLEN
10460 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
10461 // Hi = Hi >>u Shamt
10462 // else:
10463 // Lo = Hi >>u (Shamt-XLEN);
10464 // Hi = 0;
10465
10466 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
10467
10468 SDValue Zero = DAG.getConstant(0, DL, VT);
10469 SDValue One = DAG.getConstant(1, DL, VT);
10470 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
10471 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
10472 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
10473 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
10474
10475 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
10476 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
10477 SDValue ShiftLeftHi =
10478 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
10479 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
10480 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
10481 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
10482 SDValue HiFalse =
10483 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
10484
10485 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
10486
10487 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
10488 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
10489
10490 SDValue Parts[2] = {Lo, Hi};
10491 return DAG.getMergeValues(Parts, DL);
10492}
10493
10494// Lower splats of i1 types to SETCC. For each mask vector type, we have a
10495// legal equivalently-sized i8 type, so we can use that as a go-between.
10496SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
10497 SelectionDAG &DAG) const {
10498 SDLoc DL(Op);
10499 MVT VT = Op.getSimpleValueType();
10500 SDValue SplatVal = Op.getOperand(0);
10501 // All-zeros or all-ones splats are handled specially.
10502 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
10503 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
10504 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
10505 }
10506 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
10507 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
10508 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
10509 }
10510 MVT InterVT = VT.changeVectorElementType(MVT::i8);
10511 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
10512 DAG.getConstant(1, DL, SplatVal.getValueType()));
10513 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
10514 SDValue Zero = DAG.getConstant(0, DL, InterVT);
10515 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
10516}
10517
10518// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
10519// illegal (currently only vXi64 RV32).
10520// FIXME: We could also catch non-constant sign-extended i32 values and lower
10521// them to VMV_V_X_VL.
10522SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
10523 SelectionDAG &DAG) const {
10524 SDLoc DL(Op);
10525 MVT VecVT = Op.getSimpleValueType();
10526 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
10527 "Unexpected SPLAT_VECTOR_PARTS lowering");
10528
10529 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
10530 SDValue Lo = Op.getOperand(0);
10531 SDValue Hi = Op.getOperand(1);
10532
10533 MVT ContainerVT = VecVT;
10534 if (VecVT.isFixedLengthVector())
10535 ContainerVT = getContainerForFixedLengthVector(VecVT);
10536
10537 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
10538
10539 SDValue Res =
10540 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
10541
10542 if (VecVT.isFixedLengthVector())
10543 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
10544
10545 return Res;
10546}
10547
10548// Custom-lower extensions from mask vectors by using a vselect either with 1
10549// for zero/any-extension or -1 for sign-extension:
10550// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
10551// Note that any-extension is lowered identically to zero-extension.
10552SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
10553 int64_t ExtTrueVal) const {
10554 SDLoc DL(Op);
10555 MVT VecVT = Op.getSimpleValueType();
10556 SDValue Src = Op.getOperand(0);
10557 // Only custom-lower extensions from mask types
10558 assert(Src.getValueType().isVector() &&
10559 Src.getValueType().getVectorElementType() == MVT::i1);
10560
10561 if (VecVT.isScalableVector()) {
10562 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
10563 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
10564 if (Src.getOpcode() == ISD::XOR &&
10565 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
10566 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
10567 SplatTrueVal);
10568 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
10569 }
10570
10571 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
10572 MVT I1ContainerVT =
10573 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10574
10575 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
10576
10577 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
10578
10579 MVT XLenVT = Subtarget.getXLenVT();
10580 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
10581 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
10582
10583 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10584 SDValue Xor = Src.getOperand(0);
10585 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
10586 SDValue ScalableOnes = Xor.getOperand(1);
10587 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
10588 ScalableOnes.getOperand(0).isUndef() &&
10590 ScalableOnes.getOperand(1).getNode())) {
10591 CC = Xor.getOperand(0);
10592 std::swap(SplatZero, SplatTrueVal);
10593 }
10594 }
10595 }
10596
10597 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10598 DAG.getUNDEF(ContainerVT), SplatZero, VL);
10599 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10600 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
10601 SDValue Select =
10602 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
10603 SplatZero, DAG.getUNDEF(ContainerVT), VL);
10604
10605 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
10606}
10607
10608// Custom-lower truncations from vectors to mask vectors by using a mask and a
10609// setcc operation:
10610// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
10611SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
10612 SelectionDAG &DAG) const {
10613 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
10614 SDLoc DL(Op);
10615 EVT MaskVT = Op.getValueType();
10616 // Only expect to custom-lower truncations to mask types
10617 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
10618 "Unexpected type for vector mask lowering");
10619 SDValue Src = Op.getOperand(0);
10620 MVT VecVT = Src.getSimpleValueType();
10621 SDValue Mask, VL;
10622 if (IsVPTrunc) {
10623 Mask = Op.getOperand(1);
10624 VL = Op.getOperand(2);
10625 }
10626 // If this is a fixed vector, we need to convert it to a scalable vector.
10627 MVT ContainerVT = VecVT;
10628
10629 if (VecVT.isFixedLengthVector()) {
10630 ContainerVT = getContainerForFixedLengthVector(VecVT);
10631 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
10632 if (IsVPTrunc) {
10633 MVT MaskContainerVT =
10634 getContainerForFixedLengthVector(Mask.getSimpleValueType());
10635 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
10636 }
10637 }
10638
10639 if (!IsVPTrunc) {
10640 std::tie(Mask, VL) =
10641 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10642 }
10643
10644 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
10645 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
10646
10647 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10648 DAG.getUNDEF(ContainerVT), SplatOne, VL);
10649 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10650 DAG.getUNDEF(ContainerVT), SplatZero, VL);
10651
10652 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
10653 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
10654 DAG.getUNDEF(ContainerVT), Mask, VL);
10655 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
10656 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
10657 DAG.getUNDEF(MaskContainerVT), Mask, VL});
10658 if (MaskVT.isFixedLengthVector())
10659 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
10660 return Trunc;
10661}
10662
10663SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
10664 SelectionDAG &DAG) const {
10665 unsigned Opc = Op.getOpcode();
10666 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
10667 SDLoc DL(Op);
10668
10669 MVT VT = Op.getSimpleValueType();
10670 // Only custom-lower vector truncates
10671 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10672
10673 // Truncates to mask types are handled differently
10674 if (VT.getVectorElementType() == MVT::i1)
10675 return lowerVectorMaskTruncLike(Op, DAG);
10676
10677 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
10678 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
10679 // truncate by one power of two at a time.
10680 MVT DstEltVT = VT.getVectorElementType();
10681
10682 SDValue Src = Op.getOperand(0);
10683 MVT SrcVT = Src.getSimpleValueType();
10684 MVT SrcEltVT = SrcVT.getVectorElementType();
10685
10686 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
10687 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
10688 "Unexpected vector truncate lowering");
10689
10690 MVT ContainerVT = SrcVT;
10691 SDValue Mask, VL;
10692 if (IsVPTrunc) {
10693 Mask = Op.getOperand(1);
10694 VL = Op.getOperand(2);
10695 }
10696 if (SrcVT.isFixedLengthVector()) {
10697 ContainerVT = getContainerForFixedLengthVector(SrcVT);
10698 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
10699 if (IsVPTrunc) {
10700 MVT MaskVT = getMaskTypeFor(ContainerVT);
10701 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10702 }
10703 }
10704
10705 SDValue Result = Src;
10706 if (!IsVPTrunc) {
10707 std::tie(Mask, VL) =
10708 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10709 }
10710
10711 unsigned NewOpc;
10713 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
10714 else if (Opc == ISD::TRUNCATE_USAT_U)
10715 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
10716 else
10717 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
10718
10719 do {
10720 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
10721 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
10722 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
10723 } while (SrcEltVT != DstEltVT);
10724
10725 if (SrcVT.isFixedLengthVector())
10726 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10727
10728 return Result;
10729}
10730
10731SDValue
10732RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
10733 SelectionDAG &DAG) const {
10734 SDLoc DL(Op);
10735 SDValue Chain = Op.getOperand(0);
10736 SDValue Src = Op.getOperand(1);
10737 MVT VT = Op.getSimpleValueType();
10738 MVT SrcVT = Src.getSimpleValueType();
10739 MVT ContainerVT = VT;
10740 if (VT.isFixedLengthVector()) {
10741 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10742 ContainerVT =
10743 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10744 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10745 }
10746
10747 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10748
10749 // RVV can only widen/truncate fp to types double/half the size as the source.
10750 if ((VT.getVectorElementType() == MVT::f64 &&
10751 (SrcVT.getVectorElementType() == MVT::f16 ||
10752 SrcVT.getVectorElementType() == MVT::bf16)) ||
10753 ((VT.getVectorElementType() == MVT::f16 ||
10754 VT.getVectorElementType() == MVT::bf16) &&
10755 SrcVT.getVectorElementType() == MVT::f64)) {
10756 // For double rounding, the intermediate rounding should be round-to-odd.
10757 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10758 ? RISCVISD::STRICT_FP_EXTEND_VL
10759 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10760 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10761 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10762 Chain, Src, Mask, VL);
10763 Chain = Src.getValue(1);
10764 }
10765
10766 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10767 ? RISCVISD::STRICT_FP_EXTEND_VL
10768 : RISCVISD::STRICT_FP_ROUND_VL;
10769 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10770 Chain, Src, Mask, VL);
10771 if (VT.isFixedLengthVector()) {
10772 // StrictFP operations have two result values. Their lowered result should
10773 // have same result count.
10774 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10775 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10776 }
10777 return Res;
10778}
10779
10780SDValue
10781RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10782 SelectionDAG &DAG) const {
10783 bool IsVP =
10784 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10785 bool IsExtend =
10786 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10787 // RVV can only do truncate fp to types half the size as the source. We
10788 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10789 // conversion instruction.
10790 SDLoc DL(Op);
10791 MVT VT = Op.getSimpleValueType();
10792
10793 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10794
10795 SDValue Src = Op.getOperand(0);
10796 MVT SrcVT = Src.getSimpleValueType();
10797
10798 bool IsDirectExtend =
10799 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10800 (SrcVT.getVectorElementType() != MVT::f16 &&
10801 SrcVT.getVectorElementType() != MVT::bf16));
10802 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10803 VT.getVectorElementType() != MVT::bf16) ||
10804 SrcVT.getVectorElementType() != MVT::f64);
10805
10806 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10807
10808 // We have regular SD node patterns for direct non-VL extends.
10809 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10810 return Op;
10811
10812 // Prepare any fixed-length vector operands.
10813 MVT ContainerVT = VT;
10814 SDValue Mask, VL;
10815 if (IsVP) {
10816 Mask = Op.getOperand(1);
10817 VL = Op.getOperand(2);
10818 }
10819 if (VT.isFixedLengthVector()) {
10820 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10821 ContainerVT =
10822 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10823 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10824 if (IsVP) {
10825 MVT MaskVT = getMaskTypeFor(ContainerVT);
10826 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10827 }
10828 }
10829
10830 if (!IsVP)
10831 std::tie(Mask, VL) =
10832 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10833
10834 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10835
10836 if (IsDirectConv) {
10837 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10838 if (VT.isFixedLengthVector())
10839 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10840 return Src;
10841 }
10842
10843 unsigned InterConvOpc =
10844 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10845
10846 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10847 SDValue IntermediateConv =
10848 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10849 SDValue Result =
10850 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10851 if (VT.isFixedLengthVector())
10852 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10853 return Result;
10854}
10855
10856// Given a scalable vector type and an index into it, returns the type for the
10857// smallest subvector that the index fits in. This can be used to reduce LMUL
10858// for operations like vslidedown.
10859//
10860// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10861static std::optional<MVT>
10862getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10863 const RISCVSubtarget &Subtarget) {
10864 assert(VecVT.isScalableVector());
10865 const unsigned EltSize = VecVT.getScalarSizeInBits();
10866 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10867 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10868 MVT SmallerVT;
10869 if (MaxIdx < MinVLMAX)
10870 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10871 else if (MaxIdx < MinVLMAX * 2)
10872 SmallerVT =
10874 else if (MaxIdx < MinVLMAX * 4)
10875 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10878 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10879 return std::nullopt;
10880 return SmallerVT;
10881}
10882
10884 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10885 if (!IdxC || isNullConstant(Idx))
10886 return false;
10887 return isUInt<5>(IdxC->getZExtValue());
10888}
10889
10890// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10891// first position of a vector, and that vector is slid up to the insert index.
10892// By limiting the active vector length to index+1 and merging with the
10893// original vector (with an undisturbed tail policy for elements >= VL), we
10894// achieve the desired result of leaving all elements untouched except the one
10895// at VL-1, which is replaced with the desired value.
10896SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10897 SelectionDAG &DAG) const {
10898 SDLoc DL(Op);
10899 MVT VecVT = Op.getSimpleValueType();
10900 MVT XLenVT = Subtarget.getXLenVT();
10901 SDValue Vec = Op.getOperand(0);
10902 SDValue Val = Op.getOperand(1);
10903 MVT ValVT = Val.getSimpleValueType();
10904 SDValue Idx = Op.getOperand(2);
10905
10906 if (VecVT.getVectorElementType() == MVT::i1) {
10907 // FIXME: For now we just promote to an i8 vector and insert into that,
10908 // but this is probably not optimal.
10909 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10910 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10911 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10912 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10913 }
10914
10915 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10916 (ValVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
10917 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10918 MVT IntVT = VecVT.changeTypeToInteger();
10919 SDValue IntInsert = DAG.getNode(
10920 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10921 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10922 return DAG.getBitcast(VecVT, IntInsert);
10923 }
10924
10925 if (Subtarget.hasStdExtP() && VecVT.isFixedLengthVector()) {
10926 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10927 if (!IdxC)
10928 return SDValue();
10929
10930 unsigned IdxVal = IdxC->getZExtValue();
10931 unsigned NumElts = VecVT.getVectorNumElements();
10932 MVT EltVT = VecVT.getVectorElementType();
10933 Vec = DAG.getBitcast(XLenVT, Vec);
10934 SDValue ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10935
10936 // For 2-element vectors, BUILD_VECTOR is more efficient since it only needs
10937 // at most 2 instructions.
10938 if (NumElts == 2) {
10939 unsigned EltBits = EltVT.getSizeInBits();
10940 SDValue Elt0, Elt1;
10941 if (IdxVal == 0) {
10942 Elt0 = ExtVal;
10943 Elt1 = DAG.getNode(ISD::SRL, DL, XLenVT, Vec,
10944 DAG.getConstant(EltBits, DL, XLenVT));
10945 } else {
10946 Elt0 = Vec;
10947 Elt1 = ExtVal;
10948 }
10949 return DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Elt0, Elt1);
10950 }
10951
10952 // For 4/8-element vectors, use MVM(or MERGE) instruction which does bitwise
10953 // select: rd = (~mask & rd) | (mask & rs1).
10954 // This generates: slli + lui/li + mvm
10955 if (NumElts == 4 || NumElts == 8) {
10956 unsigned EltBits = EltVT.getSizeInBits();
10957 unsigned ShiftAmt = IdxVal * EltBits;
10958 uint64_t PosMask = ((1ULL << EltBits) - 1) << ShiftAmt;
10959
10960 SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, XLenVT, ExtVal,
10961 DAG.getConstant(ShiftAmt, DL, XLenVT));
10962 SDValue Mask = DAG.getConstant(PosMask, DL, XLenVT);
10963 SDValue Result =
10964 DAG.getNode(RISCVISD::MERGE, DL, XLenVT, Mask, Vec, ShiftedVal);
10965 return DAG.getBitcast(VecVT, Result);
10966 }
10967
10968 return SDValue();
10969 }
10970
10971 MVT ContainerVT = VecVT;
10972 // If the operand is a fixed-length vector, convert to a scalable one.
10973 if (VecVT.isFixedLengthVector()) {
10974 ContainerVT = getContainerForFixedLengthVector(VecVT);
10975 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10976 }
10977
10978 // If we know the index we're going to insert at, we can shrink Vec so that
10979 // we're performing the scalar inserts and slideup on a smaller LMUL.
10980 SDValue OrigVec = Vec;
10981 std::optional<unsigned> AlignedIdx;
10982 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10983 const unsigned OrigIdx = IdxC->getZExtValue();
10984 // Do we know an upper bound on LMUL?
10985 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10986 DL, DAG, Subtarget)) {
10987 ContainerVT = *ShrunkVT;
10988 AlignedIdx = 0;
10989 }
10990
10991 // If we're compiling for an exact VLEN value, we can always perform
10992 // the insert in m1 as we can determine the register corresponding to
10993 // the index in the register group.
10994 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10995 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10996 EVT ElemVT = VecVT.getVectorElementType();
10997 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10998 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10999 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
11000 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
11001 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
11002 ContainerVT = M1VT;
11003 }
11004
11005 if (AlignedIdx)
11006 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
11007 }
11008
11009 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
11010 // Even i64-element vectors on RV32 can be lowered without scalar
11011 // legalization if the most-significant 32 bits of the value are not affected
11012 // by the sign-extension of the lower 32 bits. This applies to i32 constants
11013 // and sign_extend of i32 values.
11014 if (!IsLegalInsert) {
11015 if (isa<ConstantSDNode>(Val)) {
11016 const auto *CVal = cast<ConstantSDNode>(Val);
11017 if (isInt<32>(CVal->getSExtValue())) {
11018 IsLegalInsert = true;
11019 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
11020 }
11021 } else if (Val.getOpcode() == ISD::SIGN_EXTEND &&
11022 Val.getOperand(0).getValueType() == MVT::i32) {
11023 IsLegalInsert = true;
11024 Val = Val.getOperand(0);
11025 }
11026 }
11027
11028 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11029
11030 SDValue ValInVec;
11031
11032 if (IsLegalInsert) {
11033 unsigned Opc =
11034 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
11035 if (isNullConstant(Idx)) {
11036 if (!VecVT.isFloatingPoint())
11037 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
11038 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
11039
11040 if (AlignedIdx)
11041 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
11042 if (!VecVT.isFixedLengthVector())
11043 return Vec;
11044 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
11045 }
11046
11047 // Use ri.vinsert.v.x if available.
11048 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
11050 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
11051 SDValue PolicyOp =
11053 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
11054 VL, PolicyOp);
11055 if (AlignedIdx)
11056 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
11057 if (!VecVT.isFixedLengthVector())
11058 return Vec;
11059 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
11060 }
11061
11062 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
11063 } else {
11064 // On RV32, i64-element vectors must be specially handled to place the
11065 // value at element 0, by using two vslide1down instructions in sequence on
11066 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
11067 // this.
11068 SDValue ValLo, ValHi;
11069 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
11070 MVT I32ContainerVT =
11071 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
11072 SDValue I32Mask =
11073 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
11074 // Limit the active VL to two.
11075 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
11076 // If the Idx is 0 we can insert directly into the vector.
11077 if (isNullConstant(Idx)) {
11078 // First slide in the lo value, then the hi in above it. We use slide1down
11079 // to avoid the register group overlap constraint of vslide1up.
11080 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
11081 Vec, Vec, ValLo, I32Mask, InsertI64VL);
11082 // If the source vector is undef don't pass along the tail elements from
11083 // the previous slide1down.
11084 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
11085 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
11086 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
11087 // Bitcast back to the right container type.
11088 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
11089
11090 if (AlignedIdx)
11091 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
11092 if (!VecVT.isFixedLengthVector())
11093 return ValInVec;
11094 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
11095 }
11096
11097 // First slide in the lo value, then the hi in above it. We use slide1down
11098 // to avoid the register group overlap constraint of vslide1up.
11099 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
11100 DAG.getUNDEF(I32ContainerVT),
11101 DAG.getUNDEF(I32ContainerVT), ValLo,
11102 I32Mask, InsertI64VL);
11103 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
11104 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
11105 I32Mask, InsertI64VL);
11106 // Bitcast back to the right container type.
11107 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
11108 }
11109
11110 // Now that the value is in a vector, slide it into position.
11111 SDValue InsertVL =
11112 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
11113
11114 // Use tail agnostic policy if Idx is the last index of Vec.
11116 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
11117 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
11119 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
11120 Idx, Mask, InsertVL, Policy);
11121
11122 if (AlignedIdx)
11123 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
11124 if (!VecVT.isFixedLengthVector())
11125 return Slideup;
11126 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
11127}
11128
11129// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
11130// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
11131// types this is done using VMV_X_S to allow us to glean information about the
11132// sign bits of the result.
11133SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
11134 SelectionDAG &DAG) const {
11135 SDLoc DL(Op);
11136 SDValue Idx = Op.getOperand(1);
11137 SDValue Vec = Op.getOperand(0);
11138 EVT EltVT = Op.getValueType();
11139 MVT VecVT = Vec.getSimpleValueType();
11140 MVT XLenVT = Subtarget.getXLenVT();
11141
11142 if (VecVT.getVectorElementType() == MVT::i1) {
11143 // Use vfirst.m to extract the first bit.
11144 if (isNullConstant(Idx)) {
11145 MVT ContainerVT = VecVT;
11146 if (VecVT.isFixedLengthVector()) {
11147 ContainerVT = getContainerForFixedLengthVector(VecVT);
11148 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11149 }
11150 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11151 SDValue Vfirst =
11152 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
11153 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
11154 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11155 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
11156 }
11157 if (VecVT.isFixedLengthVector()) {
11158 unsigned NumElts = VecVT.getVectorNumElements();
11159 if (NumElts >= 8) {
11160 MVT WideEltVT;
11161 unsigned WidenVecLen;
11162 SDValue ExtractElementIdx;
11163 SDValue ExtractBitIdx;
11164 unsigned MaxEEW = Subtarget.getELen();
11165 MVT LargestEltVT = MVT::getIntegerVT(
11166 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
11167 if (NumElts <= LargestEltVT.getSizeInBits()) {
11168 assert(isPowerOf2_32(NumElts) &&
11169 "the number of elements should be power of 2");
11170 WideEltVT = MVT::getIntegerVT(NumElts);
11171 WidenVecLen = 1;
11172 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
11173 ExtractBitIdx = Idx;
11174 } else {
11175 WideEltVT = LargestEltVT;
11176 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
11177 // extract element index = index / element width
11178 ExtractElementIdx = DAG.getNode(
11179 ISD::SRL, DL, XLenVT, Idx,
11180 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
11181 // mask bit index = index % element width
11182 ExtractBitIdx = DAG.getNode(
11183 ISD::AND, DL, XLenVT, Idx,
11184 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
11185 }
11186 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
11187 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
11188 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
11189 Vec, ExtractElementIdx);
11190 // Extract the bit from GPR.
11191 SDValue ShiftRight =
11192 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
11193 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
11194 DAG.getConstant(1, DL, XLenVT));
11195 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
11196 }
11197 }
11198 // Otherwise, promote to an i8 vector and extract from that.
11199 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11200 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
11201 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
11202 }
11203
11204 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
11205 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
11206 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
11207 MVT IntVT = VecVT.changeTypeToInteger();
11208 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
11209 SDValue IntExtract =
11210 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
11211 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
11212 }
11213
11214 if (Subtarget.hasStdExtP() && VecVT.isFixedLengthVector()) {
11215 if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
11216 VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
11217 return SDValue();
11218 SDValue Extracted = DAG.getBitcast(XLenVT, Vec);
11219 unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits();
11220 SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,
11221 DAG.getConstant(ElemWidth, DL, XLenVT));
11222 return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt);
11223 }
11224
11225 // If this is a fixed vector, we need to convert it to a scalable vector.
11226 MVT ContainerVT = VecVT;
11227 if (VecVT.isFixedLengthVector()) {
11228 ContainerVT = getContainerForFixedLengthVector(VecVT);
11229 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11230 }
11231
11232 // If we're compiling for an exact VLEN value and we have a known
11233 // constant index, we can always perform the extract in m1 (or
11234 // smaller) as we can determine the register corresponding to
11235 // the index in the register group.
11236 const auto VLen = Subtarget.getRealVLen();
11237 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
11238 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
11239 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
11240 unsigned OrigIdx = IdxC->getZExtValue();
11241 EVT ElemVT = VecVT.getVectorElementType();
11242 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
11243 unsigned RemIdx = OrigIdx % ElemsPerVReg;
11244 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
11245 unsigned ExtractIdx =
11246 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
11247 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
11248 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
11249 ContainerVT = M1VT;
11250 }
11251
11252 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
11253 // contains our index.
11254 std::optional<uint64_t> MaxIdx;
11255 if (VecVT.isFixedLengthVector())
11256 MaxIdx = VecVT.getVectorNumElements() - 1;
11257 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
11258 MaxIdx = IdxC->getZExtValue();
11259 if (MaxIdx) {
11260 if (auto SmallerVT =
11261 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
11262 ContainerVT = *SmallerVT;
11263 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
11264 }
11265 }
11266
11267 // Use ri.vextract.x.v if available.
11268 // TODO: Avoid index 0 and just use the vmv.x.s
11269 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
11271 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
11272 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
11273 }
11274
11275 // If after narrowing, the required slide is still greater than LMUL2,
11276 // fallback to generic expansion and go through the stack. This is done
11277 // for a subtle reason: extracting *all* elements out of a vector is
11278 // widely expected to be linear in vector size, but because vslidedown
11279 // is linear in LMUL, performing N extracts using vslidedown becomes
11280 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
11281 // seems to have the same problem (the store is linear in LMUL), but the
11282 // generic expansion *memoizes* the store, and thus for many extracts of
11283 // the same vector we end up with one store and a bunch of loads.
11284 // TODO: We don't have the same code for insert_vector_elt because we
11285 // have BUILD_VECTOR and handle the degenerate case there. Should we
11286 // consider adding an inverse BUILD_VECTOR node?
11287 MVT LMUL2VT =
11289 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
11290 return SDValue();
11291
11292 // If the index is 0, the vector is already in the right position.
11293 if (!isNullConstant(Idx)) {
11294 // Use a VL of 1 to avoid processing more elements than we need.
11295 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
11296 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11297 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
11298 }
11299
11300 if (!EltVT.isInteger()) {
11301 // Floating-point extracts are handled in TableGen.
11302 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
11303 }
11304
11305 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
11306 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
11307}
11308
11309// Some RVV intrinsics may claim that they want an integer operand to be
11310// promoted or expanded.
11312 const RISCVSubtarget &Subtarget) {
11313 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
11314 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
11315 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
11316 "Unexpected opcode");
11317
11318 if (!Subtarget.hasVInstructions())
11319 return SDValue();
11320
11321 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
11322 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
11323 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
11324
11325 SDLoc DL(Op);
11326
11328 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
11329 if (!II || !II->hasScalarOperand())
11330 return SDValue();
11331
11332 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
11333 assert(SplatOp < Op.getNumOperands());
11334
11335 SmallVector<SDValue, 8> Operands(Op->ops());
11336 SDValue &ScalarOp = Operands[SplatOp];
11337 MVT OpVT = ScalarOp.getSimpleValueType();
11338 MVT XLenVT = Subtarget.getXLenVT();
11339
11340 // If this isn't a scalar, or its type is XLenVT we're done.
11341 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
11342 return SDValue();
11343
11344 // Simplest case is that the operand needs to be promoted to XLenVT.
11345 if (OpVT.bitsLT(XLenVT)) {
11346 // If the operand is a constant, sign extend to increase our chances
11347 // of being able to use a .vi instruction. ANY_EXTEND would become a
11348 // a zero extend and the simm5 check in isel would fail.
11349 // FIXME: Should we ignore the upper bits in isel instead?
11350 unsigned ExtOpc =
11352 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
11353 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
11354 }
11355
11356 // Use the previous operand to get the vXi64 VT. The result might be a mask
11357 // VT for compares. Using the previous operand assumes that the previous
11358 // operand will never have a smaller element size than a scalar operand and
11359 // that a widening operation never uses SEW=64.
11360 // NOTE: If this fails the below assert, we can probably just find the
11361 // element count from any operand or result and use it to construct the VT.
11362 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
11363 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
11364
11365 // The more complex case is when the scalar is larger than XLenVT.
11366 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
11367 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
11368
11369 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
11370 // instruction to sign-extend since SEW>XLEN.
11371 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
11372 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
11373 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
11374 }
11375
11376 switch (IntNo) {
11377 case Intrinsic::riscv_vslide1up:
11378 case Intrinsic::riscv_vslide1down:
11379 case Intrinsic::riscv_vslide1up_mask:
11380 case Intrinsic::riscv_vslide1down_mask: {
11381 // We need to special case these when the scalar is larger than XLen.
11382 unsigned NumOps = Op.getNumOperands();
11383 bool IsMasked = NumOps == 7;
11384
11385 // Convert the vector source to the equivalent nxvXi32 vector.
11386 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
11387 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
11388 SDValue ScalarLo, ScalarHi;
11389 std::tie(ScalarLo, ScalarHi) =
11390 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
11391
11392 // Double the VL since we halved SEW.
11393 SDValue AVL = getVLOperand(Op);
11394 SDValue I32VL;
11395
11396 // Optimize for constant AVL
11397 if (isa<ConstantSDNode>(AVL)) {
11398 const auto [MinVLMAX, MaxVLMAX] =
11400
11401 uint64_t AVLInt = AVL->getAsZExtVal();
11402 if (AVLInt <= MinVLMAX) {
11403 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
11404 } else if (AVLInt >= 2 * MaxVLMAX) {
11405 // Just set vl to VLMAX in this situation
11406 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
11407 } else {
11408 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
11409 // is related to the hardware implementation.
11410 // So let the following code handle
11411 }
11412 }
11413 if (!I32VL) {
11415 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
11416 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
11417 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
11418 SDValue SETVL =
11419 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
11420 // Using vsetvli instruction to get actually used length which related to
11421 // the hardware implementation
11422 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
11423 SEW, LMUL);
11424 I32VL =
11425 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
11426 }
11427
11428 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
11429
11430 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
11431 // instructions.
11432 SDValue Passthru;
11433 if (IsMasked)
11434 Passthru = DAG.getUNDEF(I32VT);
11435 else
11436 Passthru = DAG.getBitcast(I32VT, Operands[1]);
11437
11438 if (IntNo == Intrinsic::riscv_vslide1up ||
11439 IntNo == Intrinsic::riscv_vslide1up_mask) {
11440 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
11441 ScalarHi, I32Mask, I32VL);
11442 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
11443 ScalarLo, I32Mask, I32VL);
11444 } else {
11445 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
11446 ScalarLo, I32Mask, I32VL);
11447 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
11448 ScalarHi, I32Mask, I32VL);
11449 }
11450
11451 // Convert back to nxvXi64.
11452 Vec = DAG.getBitcast(VT, Vec);
11453
11454 if (!IsMasked)
11455 return Vec;
11456 // Apply mask after the operation.
11457 SDValue Mask = Operands[NumOps - 3];
11458 SDValue MaskedOff = Operands[1];
11459 // Assume Policy operand is the last operand.
11460 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
11461 // We don't need to select maskedoff if it's undef.
11462 if (MaskedOff.isUndef())
11463 return Vec;
11464 // TAMU
11465 if (Policy == RISCVVType::TAIL_AGNOSTIC)
11466 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
11467 DAG.getUNDEF(VT), AVL);
11468 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
11469 // It's fine because vmerge does not care mask policy.
11470 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
11471 MaskedOff, AVL);
11472 }
11473 }
11474
11475 // We need to convert the scalar to a splat vector.
11476 SDValue VL = getVLOperand(Op);
11477 assert(VL.getValueType() == XLenVT);
11478 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
11479 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
11480}
11481
11482// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
11483// scalable vector llvm.get.vector.length for now.
11484//
11485// We need to convert from a scalable VF to a vsetvli with VLMax equal to
11486// (vscale * VF). The vscale and VF are independent of element width. We use
11487// SEW=8 for the vsetvli because it is the only element width that supports all
11488// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
11489// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
11490// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
11491// SEW and LMUL are better for the surrounding vector instructions.
11493 const RISCVSubtarget &Subtarget) {
11494 MVT XLenVT = Subtarget.getXLenVT();
11495
11496 // The smallest LMUL is only valid for the smallest element width.
11497 const unsigned ElementWidth = 8;
11498
11499 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
11500 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
11501 // We don't support VF==1 with ELEN==32.
11502 [[maybe_unused]] unsigned MinVF =
11503 RISCV::RVVBitsPerBlock / Subtarget.getELen();
11504
11505 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
11506 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
11507 "Unexpected VF");
11508
11509 bool Fractional = VF < LMul1VF;
11510 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
11511 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
11512 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
11513
11514 SDLoc DL(N);
11515
11516 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
11517 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
11518
11519 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
11520
11521 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
11522 SDValue Res =
11523 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
11524 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
11525}
11526
11528 const RISCVSubtarget &Subtarget) {
11529 SDValue Op0 = N->getOperand(1);
11530 MVT OpVT = Op0.getSimpleValueType();
11531 MVT ContainerVT = OpVT;
11532 if (OpVT.isFixedLengthVector()) {
11533 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
11534 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
11535 }
11536 MVT XLenVT = Subtarget.getXLenVT();
11537 SDLoc DL(N);
11538 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
11539 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
11540 if (isOneConstant(N->getOperand(2)))
11541 return Res;
11542
11543 // Convert -1 to VL.
11544 SDValue Setcc =
11545 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
11546 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
11547 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
11548}
11549
11550static inline void promoteVCIXScalar(SDValue Op,
11551 MutableArrayRef<SDValue> Operands,
11552 SelectionDAG &DAG) {
11553 const RISCVSubtarget &Subtarget =
11555
11556 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
11557 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
11558 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
11559 SDLoc DL(Op);
11560
11562 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
11563 if (!II || !II->hasScalarOperand())
11564 return;
11565
11566 unsigned SplatOp = II->ScalarOperand + 1;
11567 assert(SplatOp < Op.getNumOperands());
11568
11569 SDValue &ScalarOp = Operands[SplatOp];
11570 MVT OpVT = ScalarOp.getSimpleValueType();
11571 MVT XLenVT = Subtarget.getXLenVT();
11572
11573 // The code below is partially copied from lowerVectorIntrinsicScalars.
11574 // If this isn't a scalar, or its type is XLenVT we're done.
11575 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
11576 return;
11577
11578 // Manually emit promote operation for scalar operation.
11579 if (OpVT.bitsLT(XLenVT)) {
11580 unsigned ExtOpc =
11582 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
11583 }
11584}
11585
11586static void processVCIXOperands(SDValue OrigOp,
11587 MutableArrayRef<SDValue> Operands,
11588 SelectionDAG &DAG) {
11589 promoteVCIXScalar(OrigOp, Operands, DAG);
11590 const RISCVSubtarget &Subtarget =
11592 for (SDValue &V : Operands) {
11593 EVT ValType = V.getValueType();
11594 if (ValType.isVector() && ValType.isFloatingPoint()) {
11595 MVT InterimIVT =
11596 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
11597 ValType.getVectorElementCount());
11598 V = DAG.getBitcast(InterimIVT, V);
11599 }
11600 if (ValType.isFixedLengthVector()) {
11601 MVT OpContainerVT = getContainerForFixedLengthVector(
11602 DAG, V.getSimpleValueType(), Subtarget);
11603 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
11604 }
11605 }
11606}
11607
11608// LMUL * VLEN should be greater than or equal to EGS * SEW
11609static inline bool isValidEGW(int EGS, EVT VT,
11610 const RISCVSubtarget &Subtarget) {
11611 return (Subtarget.getRealMinVLen() *
11613 EGS * VT.getScalarSizeInBits();
11614}
11615
11616SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11617 SelectionDAG &DAG) const {
11618 unsigned IntNo = Op.getConstantOperandVal(0);
11619 SDLoc DL(Op);
11620 MVT XLenVT = Subtarget.getXLenVT();
11621
11622 switch (IntNo) {
11623 default:
11624 break; // Don't custom lower most intrinsics.
11625 case Intrinsic::riscv_tuple_insert: {
11626 SDValue Vec = Op.getOperand(1);
11627 SDValue SubVec = Op.getOperand(2);
11628 SDValue Index = Op.getOperand(3);
11629
11630 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
11631 SubVec, Index);
11632 }
11633 case Intrinsic::riscv_tuple_extract: {
11634 SDValue Vec = Op.getOperand(1);
11635 SDValue Index = Op.getOperand(2);
11636
11637 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
11638 Index);
11639 }
11640 case Intrinsic::thread_pointer: {
11641 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11642 return DAG.getRegister(RISCV::X4, PtrVT);
11643 }
11644 case Intrinsic::riscv_orc_b:
11645 case Intrinsic::riscv_brev8:
11646 case Intrinsic::riscv_sha256sig0:
11647 case Intrinsic::riscv_sha256sig1:
11648 case Intrinsic::riscv_sha256sum0:
11649 case Intrinsic::riscv_sha256sum1:
11650 case Intrinsic::riscv_sm3p0:
11651 case Intrinsic::riscv_sm3p1: {
11652 unsigned Opc;
11653 switch (IntNo) {
11654 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
11655 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
11656 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
11657 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
11658 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
11659 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
11660 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
11661 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
11662 }
11663
11664 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
11665 }
11666 case Intrinsic::riscv_sm4ks:
11667 case Intrinsic::riscv_sm4ed: {
11668 unsigned Opc =
11669 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
11670
11671 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
11672 Op.getOperand(3));
11673 }
11674 case Intrinsic::riscv_zip:
11675 case Intrinsic::riscv_unzip: {
11676 unsigned Opc =
11677 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
11678 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
11679 }
11680 case Intrinsic::riscv_mopr:
11681 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
11682 Op.getOperand(2));
11683
11684 case Intrinsic::riscv_moprr: {
11685 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
11686 Op.getOperand(2), Op.getOperand(3));
11687 }
11688 case Intrinsic::riscv_clmulh:
11689 case Intrinsic::riscv_clmulr: {
11690 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;
11691 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
11692 }
11693 case Intrinsic::experimental_get_vector_length:
11694 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
11695 case Intrinsic::experimental_cttz_elts:
11696 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
11697 case Intrinsic::riscv_vmv_x_s: {
11698 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
11699 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
11700 }
11701 case Intrinsic::riscv_vfmv_f_s:
11702 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
11703 case Intrinsic::riscv_vmv_v_x:
11704 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
11705 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
11706 Subtarget);
11707 case Intrinsic::riscv_vfmv_v_f:
11708 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
11709 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
11710 case Intrinsic::riscv_vmv_s_x: {
11711 SDValue Scalar = Op.getOperand(2);
11712
11713 if (Scalar.getValueType().bitsLE(XLenVT)) {
11714 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
11715 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
11716 Op.getOperand(1), Scalar, Op.getOperand(3));
11717 }
11718
11719 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
11720
11721 // This is an i64 value that lives in two scalar registers. We have to
11722 // insert this in a convoluted way. First we build vXi64 splat containing
11723 // the two values that we assemble using some bit math. Next we'll use
11724 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
11725 // to merge element 0 from our splat into the source vector.
11726 // FIXME: This is probably not the best way to do this, but it is
11727 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
11728 // point.
11729 // sw lo, (a0)
11730 // sw hi, 4(a0)
11731 // vlse vX, (a0)
11732 //
11733 // vid.v vVid
11734 // vmseq.vx mMask, vVid, 0
11735 // vmerge.vvm vDest, vSrc, vVal, mMask
11736 MVT VT = Op.getSimpleValueType();
11737 SDValue Vec = Op.getOperand(1);
11738 SDValue VL = getVLOperand(Op);
11739
11740 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
11741 if (Op.getOperand(1).isUndef())
11742 return SplattedVal;
11743 SDValue SplattedIdx =
11744 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11745 DAG.getConstant(0, DL, MVT::i32), VL);
11746
11747 MVT MaskVT = getMaskTypeFor(VT);
11748 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
11749 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11750 SDValue SelectCond =
11751 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11752 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
11753 DAG.getUNDEF(MaskVT), Mask, VL});
11754 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
11755 Vec, DAG.getUNDEF(VT), VL);
11756 }
11757 case Intrinsic::riscv_vfmv_s_f:
11758 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getValueType(),
11759 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
11760 // EGS * EEW >= 128 bits
11761 case Intrinsic::riscv_vaesdf_vv:
11762 case Intrinsic::riscv_vaesdf_vs:
11763 case Intrinsic::riscv_vaesdm_vv:
11764 case Intrinsic::riscv_vaesdm_vs:
11765 case Intrinsic::riscv_vaesef_vv:
11766 case Intrinsic::riscv_vaesef_vs:
11767 case Intrinsic::riscv_vaesem_vv:
11768 case Intrinsic::riscv_vaesem_vs:
11769 case Intrinsic::riscv_vaeskf1:
11770 case Intrinsic::riscv_vaeskf2:
11771 case Intrinsic::riscv_vaesz_vs:
11772 case Intrinsic::riscv_vsm4k:
11773 case Intrinsic::riscv_vsm4r_vv:
11774 case Intrinsic::riscv_vsm4r_vs: {
11775 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11776 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11777 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11778 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11779 return Op;
11780 }
11781 // EGS * EEW >= 256 bits
11782 case Intrinsic::riscv_vsm3c:
11783 case Intrinsic::riscv_vsm3me: {
11784 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
11785 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
11786 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
11787 return Op;
11788 }
11789 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
11790 case Intrinsic::riscv_vsha2ch:
11791 case Intrinsic::riscv_vsha2cl:
11792 case Intrinsic::riscv_vsha2ms: {
11793 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
11794 !Subtarget.hasStdExtZvknhb())
11795 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
11796 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11797 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11798 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11799 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11800 return Op;
11801 }
11802 case Intrinsic::riscv_sf_vc_v_x:
11803 case Intrinsic::riscv_sf_vc_v_i:
11804 case Intrinsic::riscv_sf_vc_v_xv:
11805 case Intrinsic::riscv_sf_vc_v_iv:
11806 case Intrinsic::riscv_sf_vc_v_vv:
11807 case Intrinsic::riscv_sf_vc_v_fv:
11808 case Intrinsic::riscv_sf_vc_v_xvv:
11809 case Intrinsic::riscv_sf_vc_v_ivv:
11810 case Intrinsic::riscv_sf_vc_v_vvv:
11811 case Intrinsic::riscv_sf_vc_v_fvv:
11812 case Intrinsic::riscv_sf_vc_v_xvw:
11813 case Intrinsic::riscv_sf_vc_v_ivw:
11814 case Intrinsic::riscv_sf_vc_v_vvw:
11815 case Intrinsic::riscv_sf_vc_v_fvw: {
11816 MVT VT = Op.getSimpleValueType();
11817
11818 SmallVector<SDValue> Operands{Op->op_values()};
11819 processVCIXOperands(Op, Operands, DAG);
11820
11821 MVT RetVT = VT;
11822 if (VT.isFixedLengthVector())
11824 else if (VT.isFloatingPoint())
11827
11828 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11829
11830 if (VT.isFixedLengthVector())
11831 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11832 else if (VT.isFloatingPoint())
11833 NewNode = DAG.getBitcast(VT, NewNode);
11834
11835 if (Op == NewNode)
11836 break;
11837
11838 return NewNode;
11839 }
11840 }
11841
11842 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11843}
11844
11846 unsigned Type) {
11847 SDLoc DL(Op);
11848 SmallVector<SDValue> Operands{Op->op_values()};
11849 Operands.erase(Operands.begin() + 1);
11850
11851 const RISCVSubtarget &Subtarget =
11853 MVT VT = Op.getSimpleValueType();
11854 MVT RetVT = VT;
11855 MVT FloatVT = VT;
11856
11857 if (VT.isFloatingPoint()) {
11858 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11859 VT.getVectorElementCount());
11860 FloatVT = RetVT;
11861 }
11862 if (VT.isFixedLengthVector())
11864 Subtarget);
11865
11866 processVCIXOperands(Op, Operands, DAG);
11867
11868 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11869 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11870 SDValue Chain = NewNode.getValue(1);
11871
11872 if (VT.isFixedLengthVector())
11873 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11874 if (VT.isFloatingPoint())
11875 NewNode = DAG.getBitcast(VT, NewNode);
11876
11877 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11878
11879 return NewNode;
11880}
11881
11883 unsigned Type) {
11884 SmallVector<SDValue> Operands{Op->op_values()};
11885 Operands.erase(Operands.begin() + 1);
11886 processVCIXOperands(Op, Operands, DAG);
11887
11888 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11889}
11890
11891static SDValue
11893 const RISCVSubtarget &Subtarget,
11894 SelectionDAG &DAG) {
11895 bool IsStrided;
11896 switch (IntNo) {
11897 case Intrinsic::riscv_seg2_load_mask:
11898 case Intrinsic::riscv_seg3_load_mask:
11899 case Intrinsic::riscv_seg4_load_mask:
11900 case Intrinsic::riscv_seg5_load_mask:
11901 case Intrinsic::riscv_seg6_load_mask:
11902 case Intrinsic::riscv_seg7_load_mask:
11903 case Intrinsic::riscv_seg8_load_mask:
11904 IsStrided = false;
11905 break;
11906 case Intrinsic::riscv_sseg2_load_mask:
11907 case Intrinsic::riscv_sseg3_load_mask:
11908 case Intrinsic::riscv_sseg4_load_mask:
11909 case Intrinsic::riscv_sseg5_load_mask:
11910 case Intrinsic::riscv_sseg6_load_mask:
11911 case Intrinsic::riscv_sseg7_load_mask:
11912 case Intrinsic::riscv_sseg8_load_mask:
11913 IsStrided = true;
11914 break;
11915 default:
11916 llvm_unreachable("unexpected intrinsic ID");
11917 };
11918
11919 static const Intrinsic::ID VlsegInts[7] = {
11920 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11921 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11922 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11923 Intrinsic::riscv_vlseg8_mask};
11924 static const Intrinsic::ID VlssegInts[7] = {
11925 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11926 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11927 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11928 Intrinsic::riscv_vlsseg8_mask};
11929
11930 SDLoc DL(Op);
11931 unsigned NF = Op->getNumValues() - 1;
11932 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11933 MVT XLenVT = Subtarget.getXLenVT();
11934 MVT VT = Op->getSimpleValueType(0);
11935 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11936 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11937 ContainerVT.getScalarSizeInBits();
11938 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11939
11940 // Operands: (chain, int_id, pointer, mask, vl) or
11941 // (chain, int_id, pointer, offset, mask, vl)
11942 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11943 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11944 MVT MaskVT = Mask.getSimpleValueType();
11945 MVT MaskContainerVT =
11946 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11947 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11948
11949 SDValue IntID = DAG.getTargetConstant(
11950 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11951 auto *Load = cast<MemIntrinsicSDNode>(Op);
11952
11953 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11955 Load->getChain(),
11956 IntID,
11957 DAG.getUNDEF(VecTupTy),
11958 Op.getOperand(2),
11959 Mask,
11960 VL,
11963 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11964 // Insert the stride operand.
11965 if (IsStrided)
11966 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11967
11968 SDValue Result =
11970 Load->getMemoryVT(), Load->getMemOperand());
11972 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11973 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11974 Result.getValue(0),
11975 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11976 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11977 }
11978 Results.push_back(Result.getValue(1));
11979 return DAG.getMergeValues(Results, DL);
11980}
11981
11982SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11983 SelectionDAG &DAG) const {
11984 unsigned IntNo = Op.getConstantOperandVal(1);
11985 switch (IntNo) {
11986 default:
11987 break;
11988 case Intrinsic::riscv_seg2_load_mask:
11989 case Intrinsic::riscv_seg3_load_mask:
11990 case Intrinsic::riscv_seg4_load_mask:
11991 case Intrinsic::riscv_seg5_load_mask:
11992 case Intrinsic::riscv_seg6_load_mask:
11993 case Intrinsic::riscv_seg7_load_mask:
11994 case Intrinsic::riscv_seg8_load_mask:
11995 case Intrinsic::riscv_sseg2_load_mask:
11996 case Intrinsic::riscv_sseg3_load_mask:
11997 case Intrinsic::riscv_sseg4_load_mask:
11998 case Intrinsic::riscv_sseg5_load_mask:
11999 case Intrinsic::riscv_sseg6_load_mask:
12000 case Intrinsic::riscv_sseg7_load_mask:
12001 case Intrinsic::riscv_sseg8_load_mask:
12002 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
12003
12004 case Intrinsic::riscv_sf_vc_v_x_se:
12005 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
12006 case Intrinsic::riscv_sf_vc_v_i_se:
12007 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
12008 case Intrinsic::riscv_sf_vc_v_xv_se:
12009 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
12010 case Intrinsic::riscv_sf_vc_v_iv_se:
12011 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
12012 case Intrinsic::riscv_sf_vc_v_vv_se:
12013 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
12014 case Intrinsic::riscv_sf_vc_v_fv_se:
12015 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
12016 case Intrinsic::riscv_sf_vc_v_xvv_se:
12017 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
12018 case Intrinsic::riscv_sf_vc_v_ivv_se:
12019 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
12020 case Intrinsic::riscv_sf_vc_v_vvv_se:
12021 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
12022 case Intrinsic::riscv_sf_vc_v_fvv_se:
12023 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
12024 case Intrinsic::riscv_sf_vc_v_xvw_se:
12025 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
12026 case Intrinsic::riscv_sf_vc_v_ivw_se:
12027 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
12028 case Intrinsic::riscv_sf_vc_v_vvw_se:
12029 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
12030 case Intrinsic::riscv_sf_vc_v_fvw_se:
12031 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
12032 }
12033
12034 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
12035}
12036
12037static SDValue
12039 const RISCVSubtarget &Subtarget,
12040 SelectionDAG &DAG) {
12041 bool IsStrided;
12042 switch (IntNo) {
12043 case Intrinsic::riscv_seg2_store_mask:
12044 case Intrinsic::riscv_seg3_store_mask:
12045 case Intrinsic::riscv_seg4_store_mask:
12046 case Intrinsic::riscv_seg5_store_mask:
12047 case Intrinsic::riscv_seg6_store_mask:
12048 case Intrinsic::riscv_seg7_store_mask:
12049 case Intrinsic::riscv_seg8_store_mask:
12050 IsStrided = false;
12051 break;
12052 case Intrinsic::riscv_sseg2_store_mask:
12053 case Intrinsic::riscv_sseg3_store_mask:
12054 case Intrinsic::riscv_sseg4_store_mask:
12055 case Intrinsic::riscv_sseg5_store_mask:
12056 case Intrinsic::riscv_sseg6_store_mask:
12057 case Intrinsic::riscv_sseg7_store_mask:
12058 case Intrinsic::riscv_sseg8_store_mask:
12059 IsStrided = true;
12060 break;
12061 default:
12062 llvm_unreachable("unexpected intrinsic ID");
12063 }
12064
12065 SDLoc DL(Op);
12066 static const Intrinsic::ID VssegInts[] = {
12067 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12068 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12069 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12070 Intrinsic::riscv_vsseg8_mask};
12071 static const Intrinsic::ID VsssegInts[] = {
12072 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
12073 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
12074 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
12075 Intrinsic::riscv_vssseg8_mask};
12076
12077 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
12078 // (chain, int_id, vec*, ptr, stride, mask, vl)
12079 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
12080 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
12081 MVT XLenVT = Subtarget.getXLenVT();
12082 MVT VT = Op->getOperand(2).getSimpleValueType();
12083 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
12084 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
12085 ContainerVT.getScalarSizeInBits();
12086 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
12087
12088 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
12089 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
12090 MVT MaskVT = Mask.getSimpleValueType();
12091 MVT MaskContainerVT =
12092 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
12093 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
12094
12095 SDValue IntID = DAG.getTargetConstant(
12096 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
12097 SDValue Ptr = Op->getOperand(NF + 2);
12098
12099 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
12100
12101 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12102 for (unsigned i = 0; i < NF; i++)
12103 StoredVal = DAG.getNode(
12104 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12105 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
12106 DAG, Subtarget),
12107 DAG.getTargetConstant(i, DL, MVT::i32));
12108
12110 FixedIntrinsic->getChain(),
12111 IntID,
12112 StoredVal,
12113 Ptr,
12114 Mask,
12115 VL,
12116 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
12117 // Insert the stride operand.
12118 if (IsStrided)
12119 Ops.insert(std::next(Ops.begin(), 4),
12120 Op.getOperand(Op.getNumOperands() - 3));
12121
12122 return DAG.getMemIntrinsicNode(
12123 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12124 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
12125}
12126
12127SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
12128 SelectionDAG &DAG) const {
12129 unsigned IntNo = Op.getConstantOperandVal(1);
12130 switch (IntNo) {
12131 default:
12132 break;
12133 case Intrinsic::riscv_seg2_store_mask:
12134 case Intrinsic::riscv_seg3_store_mask:
12135 case Intrinsic::riscv_seg4_store_mask:
12136 case Intrinsic::riscv_seg5_store_mask:
12137 case Intrinsic::riscv_seg6_store_mask:
12138 case Intrinsic::riscv_seg7_store_mask:
12139 case Intrinsic::riscv_seg8_store_mask:
12140 case Intrinsic::riscv_sseg2_store_mask:
12141 case Intrinsic::riscv_sseg3_store_mask:
12142 case Intrinsic::riscv_sseg4_store_mask:
12143 case Intrinsic::riscv_sseg5_store_mask:
12144 case Intrinsic::riscv_sseg6_store_mask:
12145 case Intrinsic::riscv_sseg7_store_mask:
12146 case Intrinsic::riscv_sseg8_store_mask:
12147 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
12148
12149 case Intrinsic::riscv_sf_vc_xv_se:
12150 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
12151 case Intrinsic::riscv_sf_vc_iv_se:
12152 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
12153 case Intrinsic::riscv_sf_vc_vv_se:
12154 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
12155 case Intrinsic::riscv_sf_vc_fv_se:
12156 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
12157 case Intrinsic::riscv_sf_vc_xvv_se:
12158 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
12159 case Intrinsic::riscv_sf_vc_ivv_se:
12160 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
12161 case Intrinsic::riscv_sf_vc_vvv_se:
12162 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
12163 case Intrinsic::riscv_sf_vc_fvv_se:
12164 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
12165 case Intrinsic::riscv_sf_vc_xvw_se:
12166 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
12167 case Intrinsic::riscv_sf_vc_ivw_se:
12168 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
12169 case Intrinsic::riscv_sf_vc_vvw_se:
12170 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
12171 case Intrinsic::riscv_sf_vc_fvw_se:
12172 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
12173 }
12174
12175 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
12176}
12177
12178static unsigned getRVVReductionOp(unsigned ISDOpcode) {
12179 switch (ISDOpcode) {
12180 default:
12181 llvm_unreachable("Unhandled reduction");
12182 case ISD::VP_REDUCE_ADD:
12183 case ISD::VECREDUCE_ADD:
12184 return RISCVISD::VECREDUCE_ADD_VL;
12185 case ISD::VP_REDUCE_UMAX:
12187 return RISCVISD::VECREDUCE_UMAX_VL;
12188 case ISD::VP_REDUCE_SMAX:
12190 return RISCVISD::VECREDUCE_SMAX_VL;
12191 case ISD::VP_REDUCE_UMIN:
12193 return RISCVISD::VECREDUCE_UMIN_VL;
12194 case ISD::VP_REDUCE_SMIN:
12196 return RISCVISD::VECREDUCE_SMIN_VL;
12197 case ISD::VP_REDUCE_AND:
12198 case ISD::VECREDUCE_AND:
12199 return RISCVISD::VECREDUCE_AND_VL;
12200 case ISD::VP_REDUCE_OR:
12201 case ISD::VECREDUCE_OR:
12202 return RISCVISD::VECREDUCE_OR_VL;
12203 case ISD::VP_REDUCE_XOR:
12204 case ISD::VECREDUCE_XOR:
12205 return RISCVISD::VECREDUCE_XOR_VL;
12206 case ISD::VP_REDUCE_FADD:
12207 return RISCVISD::VECREDUCE_FADD_VL;
12208 case ISD::VP_REDUCE_SEQ_FADD:
12209 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
12210 case ISD::VP_REDUCE_FMAX:
12211 case ISD::VP_REDUCE_FMAXIMUM:
12212 return RISCVISD::VECREDUCE_FMAX_VL;
12213 case ISD::VP_REDUCE_FMIN:
12214 case ISD::VP_REDUCE_FMINIMUM:
12215 return RISCVISD::VECREDUCE_FMIN_VL;
12216 }
12217
12218}
12219
12220SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
12221 SelectionDAG &DAG,
12222 bool IsVP) const {
12223 SDLoc DL(Op);
12224 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
12225 MVT VecVT = Vec.getSimpleValueType();
12226 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
12227 Op.getOpcode() == ISD::VECREDUCE_OR ||
12228 Op.getOpcode() == ISD::VECREDUCE_XOR ||
12229 Op.getOpcode() == ISD::VP_REDUCE_AND ||
12230 Op.getOpcode() == ISD::VP_REDUCE_OR ||
12231 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
12232 "Unexpected reduction lowering");
12233
12234 MVT XLenVT = Subtarget.getXLenVT();
12235
12236 MVT ContainerVT = VecVT;
12237 if (VecVT.isFixedLengthVector()) {
12238 ContainerVT = getContainerForFixedLengthVector(VecVT);
12239 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12240 }
12241
12242 SDValue Mask, VL;
12243 if (IsVP) {
12244 Mask = Op.getOperand(2);
12245 VL = Op.getOperand(3);
12246 } else {
12247 std::tie(Mask, VL) =
12248 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12249 }
12250
12251 ISD::CondCode CC;
12252 switch (Op.getOpcode()) {
12253 default:
12254 llvm_unreachable("Unhandled reduction");
12255 case ISD::VECREDUCE_AND:
12256 case ISD::VP_REDUCE_AND: {
12257 // vcpop ~x == 0
12258 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12259 if (IsVP || VecVT.isFixedLengthVector())
12260 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
12261 else
12262 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
12263 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
12264 CC = ISD::SETEQ;
12265 break;
12266 }
12267 case ISD::VECREDUCE_OR:
12268 case ISD::VP_REDUCE_OR:
12269 // vcpop x != 0
12270 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
12271 CC = ISD::SETNE;
12272 break;
12273 case ISD::VECREDUCE_XOR:
12274 case ISD::VP_REDUCE_XOR: {
12275 // ((vcpop x) & 1) != 0
12276 SDValue One = DAG.getConstant(1, DL, XLenVT);
12277 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
12278 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
12279 CC = ISD::SETNE;
12280 break;
12281 }
12282 }
12283
12284 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12285 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
12286 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
12287
12288 if (!IsVP)
12289 return SetCC;
12290
12291 // Now include the start value in the operation.
12292 // Note that we must return the start value when no elements are operated
12293 // upon. The vcpop instructions we've emitted in each case above will return
12294 // 0 for an inactive vector, and so we've already received the neutral value:
12295 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
12296 // can simply include the start value.
12297 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
12298 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
12299}
12300
12301static bool isNonZeroAVL(SDValue AVL) {
12302 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
12303 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
12304 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
12305 (ImmAVL && ImmAVL->getZExtValue() >= 1);
12306}
12307
12308/// Helper to lower a reduction sequence of the form:
12309/// scalar = reduce_op vec, scalar_start
12310static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
12311 SDValue StartValue, SDValue Vec, SDValue Mask,
12312 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
12313 const RISCVSubtarget &Subtarget) {
12314 const MVT VecVT = Vec.getSimpleValueType();
12315 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
12316 const MVT XLenVT = Subtarget.getXLenVT();
12317 const bool NonZeroAVL = isNonZeroAVL(VL);
12318
12319 // The reduction needs an LMUL1 input; do the splat at either LMUL1
12320 // or the original VT if fractional.
12321 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
12322 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
12323 // prove it is non-zero. For the AVL=0 case, we need the scalar to
12324 // be the result of the reduction operation.
12325 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
12326 SDValue InitialValue =
12327 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
12328 if (M1VT != InnerVT)
12329 InitialValue =
12330 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
12331 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
12333 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
12334 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
12335 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
12336}
12337
12338SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
12339 SelectionDAG &DAG) const {
12340 SDLoc DL(Op);
12341 SDValue Vec = Op.getOperand(0);
12342 EVT VecEVT = Vec.getValueType();
12343
12344 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
12345
12346 // Due to ordering in legalize types we may have a vector type that needs to
12347 // be split. Do that manually so we can get down to a legal type.
12348 while (getTypeAction(*DAG.getContext(), VecEVT) ==
12350 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12351 VecEVT = Lo.getValueType();
12352 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
12353 }
12354
12355 // TODO: The type may need to be widened rather than split. Or widened before
12356 // it can be split.
12357 if (!isTypeLegal(VecEVT))
12358 return SDValue();
12359
12360 MVT VecVT = VecEVT.getSimpleVT();
12361 MVT VecEltVT = VecVT.getVectorElementType();
12362 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
12363
12364 MVT ContainerVT = VecVT;
12365 if (VecVT.isFixedLengthVector()) {
12366 ContainerVT = getContainerForFixedLengthVector(VecVT);
12367 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12368 }
12369
12370 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12371
12372 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
12373 switch (BaseOpc) {
12374 case ISD::AND:
12375 case ISD::OR:
12376 case ISD::UMAX:
12377 case ISD::UMIN:
12378 case ISD::SMAX:
12379 case ISD::SMIN:
12380 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
12381 }
12382 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
12383 Mask, VL, DL, DAG, Subtarget);
12384}
12385
12386// Given a reduction op, this function returns the matching reduction opcode,
12387// the vector SDValue and the scalar SDValue required to lower this to a
12388// RISCVISD node.
12389static std::tuple<unsigned, SDValue, SDValue>
12391 const RISCVSubtarget &Subtarget) {
12392 SDLoc DL(Op);
12393 auto Flags = Op->getFlags();
12394 unsigned Opcode = Op.getOpcode();
12395 switch (Opcode) {
12396 default:
12397 llvm_unreachable("Unhandled reduction");
12398 case ISD::VECREDUCE_FADD: {
12399 // Use positive zero if we can. It is cheaper to materialize.
12400 SDValue Zero =
12401 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
12402 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
12403 }
12405 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
12406 Op.getOperand(0));
12410 case ISD::VECREDUCE_FMAX: {
12411 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
12412 unsigned RVVOpc =
12413 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
12414 ? RISCVISD::VECREDUCE_FMIN_VL
12415 : RISCVISD::VECREDUCE_FMAX_VL;
12416 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
12417 }
12418 }
12419}
12420
12421SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
12422 SelectionDAG &DAG) const {
12423 SDLoc DL(Op);
12424 MVT VecEltVT = Op.getSimpleValueType();
12425
12426 unsigned RVVOpcode;
12427 SDValue VectorVal, ScalarVal;
12428 std::tie(RVVOpcode, VectorVal, ScalarVal) =
12429 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
12430 MVT VecVT = VectorVal.getSimpleValueType();
12431
12432 MVT ContainerVT = VecVT;
12433 if (VecVT.isFixedLengthVector()) {
12434 ContainerVT = getContainerForFixedLengthVector(VecVT);
12435 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
12436 }
12437
12438 MVT ResVT = Op.getSimpleValueType();
12439 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12440 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
12441 VL, DL, DAG, Subtarget);
12442 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
12443 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
12444 return Res;
12445
12446 if (Op->getFlags().hasNoNaNs())
12447 return Res;
12448
12449 // Force output to NaN if any element is Nan.
12450 SDValue IsNan =
12451 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
12452 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
12453 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
12454 MVT XLenVT = Subtarget.getXLenVT();
12455 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
12456 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
12457 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
12458 return DAG.getSelect(
12459 DL, ResVT, NoNaNs, Res,
12460 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
12461}
12462
12463SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
12464 SelectionDAG &DAG) const {
12465 SDLoc DL(Op);
12466 unsigned Opc = Op.getOpcode();
12467 SDValue Start = Op.getOperand(0);
12468 SDValue Vec = Op.getOperand(1);
12469 EVT VecEVT = Vec.getValueType();
12470 MVT XLenVT = Subtarget.getXLenVT();
12471
12472 // TODO: The type may need to be widened rather than split. Or widened before
12473 // it can be split.
12474 if (!isTypeLegal(VecEVT))
12475 return SDValue();
12476
12477 MVT VecVT = VecEVT.getSimpleVT();
12478 unsigned RVVOpcode = getRVVReductionOp(Opc);
12479
12480 if (VecVT.isFixedLengthVector()) {
12481 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
12482 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12483 }
12484
12485 SDValue VL = Op.getOperand(3);
12486 SDValue Mask = Op.getOperand(2);
12487 SDValue Res =
12488 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
12489 Vec, Mask, VL, DL, DAG, Subtarget);
12490 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
12491 Op->getFlags().hasNoNaNs())
12492 return Res;
12493
12494 // Propagate NaNs.
12495 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
12496 // Check if any of the elements in Vec is NaN.
12497 SDValue IsNaN = DAG.getNode(
12498 RISCVISD::SETCC_VL, DL, PredVT,
12499 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
12500 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
12501 // Check if the start value is NaN.
12502 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
12503 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
12504 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
12505 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
12506 MVT ResVT = Res.getSimpleValueType();
12507 return DAG.getSelect(
12508 DL, ResVT, NoNaNs, Res,
12509 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
12510}
12511
12512SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
12513 SelectionDAG &DAG) const {
12514 SDValue Vec = Op.getOperand(0);
12515 SDValue SubVec = Op.getOperand(1);
12516 MVT VecVT = Vec.getSimpleValueType();
12517 MVT SubVecVT = SubVec.getSimpleValueType();
12518
12519 SDLoc DL(Op);
12520 MVT XLenVT = Subtarget.getXLenVT();
12521 unsigned OrigIdx = Op.getConstantOperandVal(2);
12522 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
12523
12524 if (OrigIdx == 0 && Vec.isUndef())
12525 return Op;
12526
12527 // We don't have the ability to slide mask vectors up indexed by their i1
12528 // elements; the smallest we can do is i8. Often we are able to bitcast to
12529 // equivalent i8 vectors. Note that when inserting a fixed-length vector
12530 // into a scalable one, we might not necessarily have enough scalable
12531 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
12532 if (SubVecVT.getVectorElementType() == MVT::i1) {
12533 if (VecVT.getVectorMinNumElements() >= 8 &&
12534 SubVecVT.getVectorMinNumElements() >= 8) {
12535 assert(OrigIdx % 8 == 0 && "Invalid index");
12536 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
12537 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
12538 "Unexpected mask vector lowering");
12539 OrigIdx /= 8;
12540 SubVecVT =
12541 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
12542 SubVecVT.isScalableVector());
12543 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
12544 VecVT.isScalableVector());
12545 Vec = DAG.getBitcast(VecVT, Vec);
12546 SubVec = DAG.getBitcast(SubVecVT, SubVec);
12547 } else {
12548 // We can't slide this mask vector up indexed by its i1 elements.
12549 // This poses a problem when we wish to insert a scalable vector which
12550 // can't be re-expressed as a larger type. Just choose the slow path and
12551 // extend to a larger type, then truncate back down.
12552 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
12553 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
12554 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
12555 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
12556 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
12557 Op.getOperand(2));
12558 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
12559 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
12560 }
12561 }
12562
12563 // If the subvector vector is a fixed-length type and we don't know VLEN
12564 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12565 // don't know which register of a LMUL group contains the specific subvector
12566 // as we only know the minimum register size. Therefore we must slide the
12567 // vector group up the full amount.
12568 const auto VLen = Subtarget.getRealVLen();
12569 if (SubVecVT.isFixedLengthVector() && !VLen) {
12570 MVT ContainerVT = VecVT;
12571 if (VecVT.isFixedLengthVector()) {
12572 ContainerVT = getContainerForFixedLengthVector(VecVT);
12573 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12574 }
12575
12576 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
12577
12578 SDValue Mask =
12579 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12580 // Set the vector length to only the number of elements we care about. Note
12581 // that for slideup this includes the offset.
12582 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
12583 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
12584
12585 // Use tail agnostic policy if we're inserting over Vec's tail.
12587 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
12589
12590 // If we're inserting into the lowest elements, use a tail undisturbed
12591 // vmv.v.v.
12592 if (OrigIdx == 0) {
12593 SubVec =
12594 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
12595 } else {
12596 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12597 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
12598 SlideupAmt, Mask, VL, Policy);
12599 }
12600
12601 if (VecVT.isFixedLengthVector())
12602 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
12603 return DAG.getBitcast(Op.getValueType(), SubVec);
12604 }
12605
12606 MVT ContainerVecVT = VecVT;
12607 if (VecVT.isFixedLengthVector()) {
12608 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
12609 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
12610 }
12611
12612 MVT ContainerSubVecVT = SubVecVT;
12613 if (SubVecVT.isFixedLengthVector()) {
12614 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12615 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
12616 }
12617
12618 unsigned SubRegIdx;
12619 ElementCount RemIdx;
12620 // insert_subvector scales the index by vscale if the subvector is scalable,
12621 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12622 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12623 if (SubVecVT.isFixedLengthVector()) {
12624 assert(VLen);
12625 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12626 auto Decompose =
12628 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12629 SubRegIdx = Decompose.first;
12630 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12631 (OrigIdx % Vscale));
12632 } else {
12633 auto Decompose =
12635 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
12636 SubRegIdx = Decompose.first;
12637 RemIdx = ElementCount::getScalable(Decompose.second);
12638 }
12639
12640 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
12642 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
12643 bool ExactlyVecRegSized =
12644 Subtarget.expandVScale(SubVecVT.getSizeInBits())
12645 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
12646
12647 // 1. If the Idx has been completely eliminated and this subvector's size is
12648 // a vector register or a multiple thereof, or the surrounding elements are
12649 // undef, then this is a subvector insert which naturally aligns to a vector
12650 // register. These can easily be handled using subregister manipulation.
12651 // 2. If the subvector isn't an exact multiple of a valid register group size,
12652 // then the insertion must preserve the undisturbed elements of the register.
12653 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
12654 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
12655 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
12656 // of that LMUL=1 type back into the larger vector (resolving to another
12657 // subregister operation). See below for how our VSLIDEUP works. We go via a
12658 // LMUL=1 type to avoid allocating a large register group to hold our
12659 // subvector.
12660 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
12661 if (SubVecVT.isFixedLengthVector()) {
12662 // We may get NoSubRegister if inserting at index 0 and the subvec
12663 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
12664 if (SubRegIdx == RISCV::NoSubRegister) {
12665 assert(OrigIdx == 0);
12666 return Op;
12667 }
12668
12669 // Use a insert_subvector that will resolve to an insert subreg.
12670 assert(VLen);
12671 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12672 SDValue Insert =
12673 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
12674 if (VecVT.isFixedLengthVector())
12675 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
12676 return Insert;
12677 }
12678 return Op;
12679 }
12680
12681 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
12682 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
12683 // (in our case undisturbed). This means we can set up a subvector insertion
12684 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
12685 // size of the subvector.
12686 MVT InterSubVT = ContainerVecVT;
12687 SDValue AlignedExtract = Vec;
12688 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
12689 if (SubVecVT.isFixedLengthVector()) {
12690 assert(VLen);
12691 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
12692 }
12693 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
12694 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
12695 // Extract a subvector equal to the nearest full vector register type. This
12696 // should resolve to a EXTRACT_SUBREG instruction.
12697 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
12698 }
12699
12700 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
12701
12702 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
12703
12704 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
12705 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
12706
12707 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
12709 if (Subtarget.expandVScale(EndIndex) ==
12710 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
12712
12713 // If we're inserting into the lowest elements, use a tail undisturbed
12714 // vmv.v.v.
12715 if (RemIdx.isZero()) {
12716 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
12717 SubVec, VL);
12718 } else {
12719 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12720
12721 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
12722 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
12723
12724 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
12725 SlideupAmt, Mask, VL, Policy);
12726 }
12727
12728 // If required, insert this subvector back into the correct vector register.
12729 // This should resolve to an INSERT_SUBREG instruction.
12730 if (ContainerVecVT.bitsGT(InterSubVT))
12731 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
12732
12733 if (VecVT.isFixedLengthVector())
12734 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
12735
12736 // We might have bitcast from a mask type: cast back to the original type if
12737 // required.
12738 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
12739}
12740
12741SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
12742 SelectionDAG &DAG) const {
12743 SDValue Vec = Op.getOperand(0);
12744 MVT SubVecVT = Op.getSimpleValueType();
12745 MVT VecVT = Vec.getSimpleValueType();
12746
12747 SDLoc DL(Op);
12748 MVT XLenVT = Subtarget.getXLenVT();
12749 unsigned OrigIdx = Op.getConstantOperandVal(1);
12750 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
12751
12752 // With an index of 0 this is a cast-like subvector, which can be performed
12753 // with subregister operations.
12754 if (OrigIdx == 0)
12755 return Op;
12756
12757 // We don't have the ability to slide mask vectors down indexed by their i1
12758 // elements; the smallest we can do is i8. Often we are able to bitcast to
12759 // equivalent i8 vectors. Note that when extracting a fixed-length vector
12760 // from a scalable one, we might not necessarily have enough scalable
12761 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
12762 if (SubVecVT.getVectorElementType() == MVT::i1) {
12763 if (VecVT.getVectorMinNumElements() >= 8 &&
12764 SubVecVT.getVectorMinNumElements() >= 8) {
12765 assert(OrigIdx % 8 == 0 && "Invalid index");
12766 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
12767 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
12768 "Unexpected mask vector lowering");
12769 OrigIdx /= 8;
12770 SubVecVT =
12771 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
12772 SubVecVT.isScalableVector());
12773 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
12774 VecVT.isScalableVector());
12775 Vec = DAG.getBitcast(VecVT, Vec);
12776 } else {
12777 // We can't slide this mask vector down, indexed by its i1 elements.
12778 // This poses a problem when we wish to extract a scalable vector which
12779 // can't be re-expressed as a larger type. Just choose the slow path and
12780 // extend to a larger type, then truncate back down.
12781 // TODO: We could probably improve this when extracting certain fixed
12782 // from fixed, where we can extract as i8 and shift the correct element
12783 // right to reach the desired subvector?
12784 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
12785 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
12786 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
12787 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
12788 Op.getOperand(1));
12789 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
12790 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
12791 }
12792 }
12793
12794 const auto VLen = Subtarget.getRealVLen();
12795
12796 // If the subvector vector is a fixed-length type and we don't know VLEN
12797 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12798 // don't know which register of a LMUL group contains the specific subvector
12799 // as we only know the minimum register size. Therefore we must slide the
12800 // vector group down the full amount.
12801 if (SubVecVT.isFixedLengthVector() && !VLen) {
12802 MVT ContainerVT = VecVT;
12803 if (VecVT.isFixedLengthVector()) {
12804 ContainerVT = getContainerForFixedLengthVector(VecVT);
12805 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12806 }
12807
12808 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
12809 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12810 if (auto ShrunkVT =
12811 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12812 ContainerVT = *ShrunkVT;
12813 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12814 }
12815
12816 SDValue Mask =
12817 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12818 // Set the vector length to only the number of elements we care about. This
12819 // avoids sliding down elements we're going to discard straight away.
12820 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12821 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12822 SDValue Slidedown =
12823 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12824 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12825 // Now we can use a cast-like subvector extract to get the result.
12826 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12827 return DAG.getBitcast(Op.getValueType(), Slidedown);
12828 }
12829
12830 if (VecVT.isFixedLengthVector()) {
12831 VecVT = getContainerForFixedLengthVector(VecVT);
12832 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12833 }
12834
12835 MVT ContainerSubVecVT = SubVecVT;
12836 if (SubVecVT.isFixedLengthVector())
12837 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12838
12839 unsigned SubRegIdx;
12840 ElementCount RemIdx;
12841 // extract_subvector scales the index by vscale if the subvector is scalable,
12842 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12843 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12844 if (SubVecVT.isFixedLengthVector()) {
12845 assert(VLen);
12846 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12847 auto Decompose =
12849 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12850 SubRegIdx = Decompose.first;
12851 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12852 (OrigIdx % Vscale));
12853 } else {
12854 auto Decompose =
12856 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12857 SubRegIdx = Decompose.first;
12858 RemIdx = ElementCount::getScalable(Decompose.second);
12859 }
12860
12861 // If the Idx has been completely eliminated then this is a subvector extract
12862 // which naturally aligns to a vector register. These can easily be handled
12863 // using subregister manipulation. We use an extract_subvector that will
12864 // resolve to an extract subreg.
12865 if (RemIdx.isZero()) {
12866 if (SubVecVT.isFixedLengthVector()) {
12867 assert(VLen);
12868 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12869 Vec =
12870 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12871 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12872 }
12873 return Op;
12874 }
12875
12876 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12877 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12878 // divide exactly.
12879 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12880 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12881
12882 // If the vector type is an LMUL-group type, extract a subvector equal to the
12883 // nearest full vector register type.
12884 MVT InterSubVT = VecVT;
12885 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12886 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12887 // we should have successfully decomposed the extract into a subregister.
12888 // We use an extract_subvector that will resolve to a subreg extract.
12889 assert(SubRegIdx != RISCV::NoSubRegister);
12890 (void)SubRegIdx;
12891 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12892 if (SubVecVT.isFixedLengthVector()) {
12893 assert(VLen);
12894 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12895 }
12896 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12897 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12898 }
12899
12900 // Slide this vector register down by the desired number of elements in order
12901 // to place the desired subvector starting at element 0.
12902 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12903 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12904 if (SubVecVT.isFixedLengthVector())
12905 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12906 SDValue Slidedown =
12907 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12908 Vec, SlidedownAmt, Mask, VL);
12909
12910 // Now the vector is in the right position, extract our final subvector. This
12911 // should resolve to a COPY.
12912 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12913
12914 // We might have bitcast from a mask type: cast back to the original type if
12915 // required.
12916 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12917}
12918
12919// Widen a vector's operands to i8, then truncate its results back to the
12920// original type, typically i1. All operand and result types must be the same.
12922 SelectionDAG &DAG) {
12923 MVT VT = N.getSimpleValueType();
12924 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12926 for (SDValue Op : N->ops()) {
12927 assert(Op.getSimpleValueType() == VT &&
12928 "Operands and result must be same type");
12929 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12930 }
12931
12932 unsigned NumVals = N->getNumValues();
12933
12935 NumVals,
12936 N.getValueType().changeVectorElementType(*DAG.getContext(), MVT::i8)));
12937 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12938 SmallVector<SDValue, 4> TruncVals;
12939 for (unsigned I = 0; I < NumVals; I++) {
12940 TruncVals.push_back(
12941 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12942 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12943 }
12944
12945 if (TruncVals.size() > 1)
12946 return DAG.getMergeValues(TruncVals, DL);
12947 return TruncVals.front();
12948}
12949
12950SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12951 SelectionDAG &DAG) const {
12952 SDLoc DL(Op);
12953 MVT VecVT = Op.getSimpleValueType();
12954
12955 const unsigned Factor = Op->getNumValues();
12956 assert(Factor <= 8);
12957
12958 // 1 bit element vectors need to be widened to e8
12959 if (VecVT.getVectorElementType() == MVT::i1)
12960 return widenVectorOpsToi8(Op, DL, DAG);
12961
12962 // Convert to scalable vectors first.
12963 if (VecVT.isFixedLengthVector()) {
12964 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12966 for (unsigned i = 0U; i < Factor; ++i)
12967 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12968 Subtarget);
12969
12970 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12971 SDValue NewDeinterleave =
12973
12974 SmallVector<SDValue, 8> Res(Factor);
12975 for (unsigned i = 0U; i < Factor; ++i)
12976 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12977 DAG, Subtarget);
12978 return DAG.getMergeValues(Res, DL);
12979 }
12980
12981 // If concatenating would exceed LMUL=8, we need to split.
12982 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12983 (8 * RISCV::RVVBitsPerBlock)) {
12984 SmallVector<SDValue, 8> Ops(Factor * 2);
12985 for (unsigned i = 0; i != Factor; ++i) {
12986 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12987 Ops[i * 2] = OpLo;
12988 Ops[i * 2 + 1] = OpHi;
12989 }
12990
12991 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12992
12994 ArrayRef(Ops).slice(0, Factor));
12996 ArrayRef(Ops).slice(Factor, Factor));
12997
12998 SmallVector<SDValue, 8> Res(Factor);
12999 for (unsigned i = 0; i != Factor; ++i)
13000 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
13001 Hi.getValue(i));
13002
13003 return DAG.getMergeValues(Res, DL);
13004 }
13005
13006 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
13007 MVT VT = Op->getSimpleValueType(0);
13008 SDValue V1 = Op->getOperand(0);
13009 SDValue V2 = Op->getOperand(1);
13010
13011 // For fractional LMUL, check if we can use a higher LMUL
13012 // instruction to avoid a vslidedown.
13013 if (SDValue Src = foldConcatVector(V1, V2);
13014 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
13015 EVT NewVT = VT.getDoubleNumVectorElementsVT();
13016 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
13017 // Freeze the source so we can increase its use count.
13018 Src = DAG.getFreeze(Src);
13019 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
13020 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
13021 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
13022 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
13023 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
13024 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
13025 return DAG.getMergeValues({Even, Odd}, DL);
13026 }
13027
13028 // Freeze the sources so we can increase their use count.
13029 V1 = DAG.getFreeze(V1);
13030 V2 = DAG.getFreeze(V2);
13031 SDValue Even =
13032 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
13033 SDValue Odd =
13034 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
13035 return DAG.getMergeValues({Even, Odd}, DL);
13036 }
13037
13038 SmallVector<SDValue, 8> Ops(Op->op_values());
13039
13040 // Concatenate the vectors as one vector to deinterleave
13041 MVT ConcatVT =
13044 PowerOf2Ceil(Factor)));
13045 if (Ops.size() < PowerOf2Ceil(Factor))
13046 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
13047 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
13048
13049 if (Factor == 2) {
13050 // We can deinterleave through vnsrl.wi if the element type is smaller than
13051 // ELEN
13052 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
13053 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
13054 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
13055 return DAG.getMergeValues({Even, Odd}, DL);
13056 }
13057
13058 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
13059 // possibly mask vector, then extract the required subvector. Doing this
13060 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
13061 // creation to be rematerialized during register allocation to reduce
13062 // register pressure if needed.
13063
13064 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
13065
13066 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
13067 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
13068 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
13069
13070 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
13071 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
13072 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
13073
13074 // vcompress the even and odd elements into two separate vectors
13075 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
13076 EvenMask, DAG.getUNDEF(ConcatVT));
13077 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
13078 OddMask, DAG.getUNDEF(ConcatVT));
13079
13080 // Extract the result half of the gather for even and odd
13081 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
13082 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
13083
13084 return DAG.getMergeValues({Even, Odd}, DL);
13085 }
13086
13087 // Store with unit-stride store and load it back with segmented load.
13088 MVT XLenVT = Subtarget.getXLenVT();
13089 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
13090 SDValue Passthru = DAG.getUNDEF(ConcatVT);
13091
13092 // Allocate a stack slot.
13093 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
13095 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
13096 auto &MF = DAG.getMachineFunction();
13097 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13098 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
13099
13100 SDValue StoreOps[] = {DAG.getEntryNode(),
13101 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
13102 Concat, StackPtr, VL};
13103
13104 SDValue Chain = DAG.getMemIntrinsicNode(
13105 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
13106 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
13108
13109 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
13110 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
13111 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
13112 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
13113 Intrinsic::riscv_vlseg8_mask};
13114
13115 SDValue LoadOps[] = {
13116 Chain,
13117 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
13118 Passthru,
13119 StackPtr,
13120 Mask,
13121 VL,
13124 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
13125
13126 unsigned Sz =
13127 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
13128 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
13129
13131 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
13132 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
13134
13135 SmallVector<SDValue, 8> Res(Factor);
13136
13137 for (unsigned i = 0U; i < Factor; ++i)
13138 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
13139 DAG.getTargetConstant(i, DL, MVT::i32));
13140
13141 return DAG.getMergeValues(Res, DL);
13142}
13143
13144SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
13145 SelectionDAG &DAG) const {
13146 SDLoc DL(Op);
13147 MVT VecVT = Op.getSimpleValueType();
13148
13149 const unsigned Factor = Op.getNumOperands();
13150 assert(Factor <= 8);
13151
13152 // i1 vectors need to be widened to i8
13153 if (VecVT.getVectorElementType() == MVT::i1)
13154 return widenVectorOpsToi8(Op, DL, DAG);
13155
13156 // Convert to scalable vectors first.
13157 if (VecVT.isFixedLengthVector()) {
13158 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
13160 for (unsigned i = 0U; i < Factor; ++i)
13161 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
13162 Subtarget);
13163
13164 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
13165 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
13166
13167 SmallVector<SDValue, 8> Res(Factor);
13168 for (unsigned i = 0U; i < Factor; ++i)
13169 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
13170 Subtarget);
13171 return DAG.getMergeValues(Res, DL);
13172 }
13173
13174 MVT XLenVT = Subtarget.getXLenVT();
13175 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
13176
13177 // If the VT is larger than LMUL=8, we need to split and reassemble.
13178 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
13179 (8 * RISCV::RVVBitsPerBlock)) {
13180 SmallVector<SDValue, 8> Ops(Factor * 2);
13181 for (unsigned i = 0; i != Factor; ++i) {
13182 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
13183 Ops[i] = OpLo;
13184 Ops[i + Factor] = OpHi;
13185 }
13186
13187 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
13188
13189 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
13190 ArrayRef(Ops).take_front(Factor)),
13192 ArrayRef(Ops).drop_front(Factor))};
13193
13194 SmallVector<SDValue, 8> Concats(Factor);
13195 for (unsigned i = 0; i != Factor; ++i) {
13196 unsigned IdxLo = 2 * i;
13197 unsigned IdxHi = 2 * i + 1;
13198 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
13199 Res[IdxLo / Factor].getValue(IdxLo % Factor),
13200 Res[IdxHi / Factor].getValue(IdxHi % Factor));
13201 }
13202
13203 return DAG.getMergeValues(Concats, DL);
13204 }
13205
13206 SDValue Interleaved;
13207
13208 // Spill to the stack using a segment store for simplicity.
13209 if (Factor != 2) {
13210 EVT MemVT =
13212 VecVT.getVectorElementCount() * Factor);
13213
13214 // Allocate a stack slot.
13215 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
13217 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
13218 EVT PtrVT = StackPtr.getValueType();
13219 auto &MF = DAG.getMachineFunction();
13220 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13221 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
13222
13223 static const Intrinsic::ID IntrIds[] = {
13224 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
13225 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
13226 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
13227 Intrinsic::riscv_vsseg8_mask,
13228 };
13229
13230 unsigned Sz =
13231 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
13232 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
13233
13234 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
13235 for (unsigned i = 0; i < Factor; i++)
13236 StoredVal =
13237 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
13238 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
13239
13240 SDValue Ops[] = {DAG.getEntryNode(),
13241 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
13242 StoredVal,
13243 StackPtr,
13244 Mask,
13245 VL,
13247 DL, XLenVT)};
13248
13249 SDValue Chain = DAG.getMemIntrinsicNode(
13250 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
13251 VecVT.getVectorElementType(), PtrInfo, Alignment,
13253
13254 SmallVector<SDValue, 8> Loads(Factor);
13255
13256 SDValue Increment = DAG.getTypeSize(DL, PtrVT, VecVT.getStoreSize());
13257 for (unsigned i = 0; i != Factor; ++i) {
13258 if (i != 0)
13259 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
13260
13261 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
13262 }
13263
13264 return DAG.getMergeValues(Loads, DL);
13265 }
13266
13267 // Use ri.vzip2{a,b} if available
13268 // TODO: Figure out the best lowering for the spread variants
13269 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
13270 !Op.getOperand(1).isUndef()) {
13271 // Freeze the sources so we can increase their use count.
13272 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
13273 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
13274 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
13275 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
13276 return DAG.getMergeValues({Lo, Hi}, DL);
13277 }
13278
13279 // If the element type is smaller than ELEN, then we can interleave with
13280 // vwaddu.vv and vwmaccu.vx
13281 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
13282 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
13283 DAG, Subtarget);
13284 } else {
13285 // Otherwise, fallback to using vrgathere16.vv
13286 MVT ConcatVT =
13289 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
13290 Op.getOperand(0), Op.getOperand(1));
13291
13292 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
13293
13294 // 0 1 2 3 4 5 6 7 ...
13295 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
13296
13297 // 1 1 1 1 1 1 1 1 ...
13298 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
13299
13300 // 1 0 1 0 1 0 1 0 ...
13301 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
13302 OddMask = DAG.getSetCC(
13303 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
13304 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
13306
13307 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
13308
13309 // Build up the index vector for interleaving the concatenated vector
13310 // 0 0 1 1 2 2 3 3 ...
13311 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
13312 // 0 n 1 n+1 2 n+2 3 n+3 ...
13313 Idx =
13314 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
13315
13316 // Then perform the interleave
13317 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
13318 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
13319 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
13320 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
13321 }
13322
13323 // Extract the two halves from the interleaved result
13324 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
13325 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
13326 VecVT.getVectorMinNumElements());
13327
13328 return DAG.getMergeValues({Lo, Hi}, DL);
13329}
13330
13331// Lower step_vector to the vid instruction. Any non-identity step value must
13332// be accounted for my manual expansion.
13333SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
13334 SelectionDAG &DAG) const {
13335 SDLoc DL(Op);
13336 MVT VT = Op.getSimpleValueType();
13337 assert(VT.isScalableVector() && "Expected scalable vector");
13338 MVT XLenVT = Subtarget.getXLenVT();
13339 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
13340 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
13341 uint64_t StepValImm = Op.getConstantOperandVal(0);
13342 if (StepValImm != 1) {
13343 if (isPowerOf2_64(StepValImm)) {
13344 SDValue StepVal =
13345 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13346 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
13347 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
13348 } else {
13349 SDValue StepVal = lowerScalarSplat(
13350 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
13351 VL, VT, DL, DAG, Subtarget);
13352 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
13353 }
13354 }
13355 return StepVec;
13356}
13357
13358// Implement vector_reverse using vrgather.vv with indices determined by
13359// subtracting the id of each element from (VLMAX-1). This will convert
13360// the indices like so:
13361// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
13362// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13363SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
13364 SelectionDAG &DAG) const {
13365 SDLoc DL(Op);
13366 MVT VecVT = Op.getSimpleValueType();
13367 if (VecVT.getVectorElementType() == MVT::i1) {
13368 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
13369 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
13370 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
13371 return DAG.getSetCC(DL, VecVT, Op2,
13372 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
13373 }
13374
13375 MVT ContainerVT = VecVT;
13376 SDValue Vec = Op.getOperand(0);
13377 if (VecVT.isFixedLengthVector()) {
13378 ContainerVT = getContainerForFixedLengthVector(VecVT);
13379 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13380 }
13381
13382 MVT XLenVT = Subtarget.getXLenVT();
13383 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
13384
13385 // On some uarchs vrgather.vv will read from every input register for each
13386 // output register, regardless of the indices. However to reverse a vector
13387 // each output register only needs to read from one register. So decompose it
13388 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
13389 // O(LMUL^2).
13390 //
13391 // vsetvli a1, zero, e64, m4, ta, ma
13392 // vrgatherei16.vv v12, v8, v16
13393 // ->
13394 // vsetvli a1, zero, e64, m1, ta, ma
13395 // vrgather.vv v15, v8, v16
13396 // vrgather.vv v14, v9, v16
13397 // vrgather.vv v13, v10, v16
13398 // vrgather.vv v12, v11, v16
13399 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
13400 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
13401 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
13402 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getValueType(), Lo);
13403 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getValueType(), Hi);
13404 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
13405
13406 // Fixed length vectors might not fit exactly into their container, and so
13407 // leave a gap in the front of the vector after being reversed. Slide this
13408 // away.
13409 //
13410 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
13411 // 0 1 2 3 x x x x <- reverse
13412 // x x x x 0 1 2 3 <- vslidedown.vx
13413 if (VecVT.isFixedLengthVector()) {
13414 SDValue Offset = DAG.getNode(
13415 ISD::SUB, DL, XLenVT,
13416 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
13417 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
13418 Concat =
13419 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13420 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
13421 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
13422 }
13423 return Concat;
13424 }
13425
13426 unsigned EltSize = ContainerVT.getScalarSizeInBits();
13427 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
13428 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13429 unsigned MaxVLMAX =
13430 VecVT.isFixedLengthVector()
13431 ? VecVT.getVectorNumElements()
13432 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13433
13434 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13435 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
13436
13437 // If this is SEW=8 and VLMAX is potentially more than 256, we need
13438 // to use vrgatherei16.vv.
13439 if (MaxVLMAX > 256 && EltSize == 8) {
13440 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
13441 // Reverse each half, then reassemble them in reverse order.
13442 // NOTE: It's also possible that after splitting that VLMAX no longer
13443 // requires vrgatherei16.vv.
13444 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13445 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
13446 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
13447 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13448 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13449 // Reassemble the low and high pieces reversed.
13450 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);
13451 }
13452
13453 // Just promote the int type to i16 which will double the LMUL.
13454 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
13455 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13456 }
13457
13458 // At LMUL > 1, do the index computation in 16 bits to reduce register
13459 // pressure.
13460 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
13461 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
13462 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
13463 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13464 IntVT = IntVT.changeVectorElementType(MVT::i16);
13465 }
13466
13467 // Calculate VLMAX-1 for the desired SEW.
13468 SDValue VLMinus1 = DAG.getNode(
13469 ISD::SUB, DL, XLenVT,
13470 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
13471 DAG.getConstant(1, DL, XLenVT));
13472
13473 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
13474 bool IsRV32E64 =
13475 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
13476 SDValue SplatVL;
13477 if (!IsRV32E64)
13478 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
13479 else
13480 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
13481 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
13482
13483 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
13484 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
13485 DAG.getUNDEF(IntVT), Mask, VL);
13486
13487 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
13488 DAG.getUNDEF(ContainerVT), Mask, VL);
13489 if (VecVT.isFixedLengthVector())
13490 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
13491 return Gather;
13492}
13493
13494SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
13495 SelectionDAG &DAG) const {
13496 SDLoc DL(Op);
13497 SDValue V1 = Op.getOperand(0);
13498 SDValue V2 = Op.getOperand(1);
13499 SDValue Offset = Op.getOperand(2);
13500 MVT XLenVT = Subtarget.getXLenVT();
13501 MVT VecVT = Op.getSimpleValueType();
13502
13503 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
13504
13505 SDValue DownOffset, UpOffset;
13506 if (Op.getOpcode() == ISD::VECTOR_SPLICE_LEFT) {
13507 // The operand is a TargetConstant, we need to rebuild it as a regular
13508 // constant.
13509 DownOffset = Offset;
13510 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, Offset);
13511 } else {
13512 // The operand is a TargetConstant, we need to rebuild it as a regular
13513 // constant rather than negating the original operand.
13514 UpOffset = Offset;
13515 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, Offset);
13516 }
13517
13518 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
13519
13520 SDValue SlideDown = getVSlidedown(
13521 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
13522 Subtarget.hasVLDependentLatency() ? UpOffset
13523 : DAG.getRegister(RISCV::X0, XLenVT));
13524 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
13525 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
13527}
13528
13529SDValue
13530RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
13531 SelectionDAG &DAG) const {
13532 SDLoc DL(Op);
13533 auto *Load = cast<LoadSDNode>(Op);
13534
13536 Load->getMemoryVT(),
13537 *Load->getMemOperand()) &&
13538 "Expecting a correctly-aligned load");
13539
13540 MVT VT = Op.getSimpleValueType();
13541 MVT XLenVT = Subtarget.getXLenVT();
13542 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13543
13544 // If we know the exact VLEN and our fixed length vector completely fills
13545 // the container, use a whole register load instead.
13546 const auto [MinVLMAX, MaxVLMAX] =
13547 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
13548 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
13549 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
13550 MachineMemOperand *MMO = Load->getMemOperand();
13551 SDValue NewLoad =
13552 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
13553 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
13554 MMO->getAAInfo(), MMO->getRanges());
13555 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
13556 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
13557 }
13558
13559 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
13560
13561 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
13562 SDValue IntID = DAG.getTargetConstant(
13563 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
13564 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
13565 if (!IsMaskOp)
13566 Ops.push_back(DAG.getUNDEF(ContainerVT));
13567 Ops.push_back(Load->getBasePtr());
13568 Ops.push_back(VL);
13569 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13570 SDValue NewLoad =
13572 Load->getMemoryVT(), Load->getMemOperand());
13573
13574 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
13575 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
13576}
13577
13578SDValue
13579RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
13580 SelectionDAG &DAG) const {
13581 SDLoc DL(Op);
13582 auto *Store = cast<StoreSDNode>(Op);
13583
13585 Store->getMemoryVT(),
13586 *Store->getMemOperand()) &&
13587 "Expecting a correctly-aligned store");
13588
13589 SDValue StoreVal = Store->getValue();
13590 MVT VT = StoreVal.getSimpleValueType();
13591 MVT XLenVT = Subtarget.getXLenVT();
13592
13593 // If the size less than a byte, we need to pad with zeros to make a byte.
13594 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
13595 VT = MVT::v8i1;
13596 StoreVal =
13597 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
13598 }
13599
13600 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13601
13602 SDValue NewValue =
13603 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13604
13605 // If we know the exact VLEN and our fixed length vector completely fills
13606 // the container, use a whole register store instead.
13607 const auto [MinVLMAX, MaxVLMAX] =
13608 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
13609 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
13610 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
13611 MachineMemOperand *MMO = Store->getMemOperand();
13612 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
13613 MMO->getPointerInfo(), MMO->getBaseAlign(),
13614 MMO->getFlags(), MMO->getAAInfo());
13615 }
13616
13617 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
13618
13619 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
13620 SDValue IntID = DAG.getTargetConstant(
13621 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
13622 return DAG.getMemIntrinsicNode(
13623 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
13624 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
13625 Store->getMemoryVT(), Store->getMemOperand());
13626}
13627
13628SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
13629 SelectionDAG &DAG) const {
13630 SDLoc DL(Op);
13631 MVT VT = Op.getSimpleValueType();
13632
13633 const auto *MemSD = cast<MemSDNode>(Op);
13634 EVT MemVT = MemSD->getMemoryVT();
13635 MachineMemOperand *MMO = MemSD->getMemOperand();
13636 SDValue Chain = MemSD->getChain();
13637 SDValue BasePtr = MemSD->getBasePtr();
13638
13639 SDValue Mask, PassThru, VL;
13640 bool IsExpandingLoad = false;
13641 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
13642 Mask = VPLoad->getMask();
13643 PassThru = DAG.getUNDEF(VT);
13644 VL = VPLoad->getVectorLength();
13645 } else {
13646 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
13647 Mask = MLoad->getMask();
13648 PassThru = MLoad->getPassThru();
13649 IsExpandingLoad = MLoad->isExpandingLoad();
13650 }
13651
13652 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13653
13654 MVT XLenVT = Subtarget.getXLenVT();
13655
13656 MVT ContainerVT = VT;
13657 if (VT.isFixedLengthVector()) {
13658 ContainerVT = getContainerForFixedLengthVector(VT);
13659 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
13660 if (!IsUnmasked) {
13661 MVT MaskVT = getMaskTypeFor(ContainerVT);
13662 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13663 }
13664 }
13665
13666 if (!VL)
13667 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13668
13669 SDValue ExpandingVL;
13670 if (!IsUnmasked && IsExpandingLoad) {
13671 ExpandingVL = VL;
13672 VL =
13673 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13674 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13675 }
13676
13677 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
13678 : Intrinsic::riscv_vle_mask;
13679 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13680 if (IntID == Intrinsic::riscv_vle)
13681 Ops.push_back(DAG.getUNDEF(ContainerVT));
13682 else
13683 Ops.push_back(PassThru);
13684 Ops.push_back(BasePtr);
13685 if (IntID == Intrinsic::riscv_vle_mask)
13686 Ops.push_back(Mask);
13687 Ops.push_back(VL);
13688 if (IntID == Intrinsic::riscv_vle_mask)
13689 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
13690
13691 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13692
13693 SDValue Result =
13694 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13695 Chain = Result.getValue(1);
13696 if (ExpandingVL) {
13697 MVT IndexVT = ContainerVT;
13698 if (ContainerVT.isFloatingPoint())
13699 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
13700
13701 MVT IndexEltVT = IndexVT.getVectorElementType();
13702 bool UseVRGATHEREI16 = false;
13703 // If index vector is an i8 vector and the element count exceeds 256, we
13704 // should change the element type of index vector to i16 to avoid
13705 // overflow.
13706 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
13707 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
13708 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
13709 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
13710 UseVRGATHEREI16 = true;
13711 }
13712
13713 SDValue Iota =
13714 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
13715 DAG.getTargetConstant(Intrinsic::riscv_viota, DL, XLenVT),
13716 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
13717 Result =
13718 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
13719 : RISCVISD::VRGATHER_VV_VL,
13720 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
13721 }
13722
13723 if (VT.isFixedLengthVector())
13724 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13725
13726 return DAG.getMergeValues({Result, Chain}, DL);
13727}
13728
13729SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
13730 SDLoc DL(Op);
13731 MVT VT = Op->getSimpleValueType(0);
13732
13733 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
13734 EVT MemVT = VPLoadFF->getMemoryVT();
13735 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
13736 SDValue Chain = VPLoadFF->getChain();
13737 SDValue BasePtr = VPLoadFF->getBasePtr();
13738
13739 SDValue Mask = VPLoadFF->getMask();
13740 SDValue VL = VPLoadFF->getVectorLength();
13741
13742 MVT XLenVT = Subtarget.getXLenVT();
13743
13744 MVT ContainerVT = VT;
13745 if (VT.isFixedLengthVector()) {
13746 ContainerVT = getContainerForFixedLengthVector(VT);
13747 MVT MaskVT = getMaskTypeFor(ContainerVT);
13748 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13749 }
13750
13751 unsigned IntID = Intrinsic::riscv_vleff_mask;
13752 SDValue Ops[] = {
13753 Chain,
13754 DAG.getTargetConstant(IntID, DL, XLenVT),
13755 DAG.getUNDEF(ContainerVT),
13756 BasePtr,
13757 Mask,
13758 VL,
13760
13761 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
13762
13763 SDValue Result =
13764 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13765 SDValue OutVL = Result.getValue(1);
13766 Chain = Result.getValue(2);
13767
13768 if (VT.isFixedLengthVector())
13769 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13770
13771 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
13772}
13773
13774SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
13775 SelectionDAG &DAG) const {
13776 SDLoc DL(Op);
13777
13778 const auto *MemSD = cast<MemSDNode>(Op);
13779 EVT MemVT = MemSD->getMemoryVT();
13780 MachineMemOperand *MMO = MemSD->getMemOperand();
13781 SDValue Chain = MemSD->getChain();
13782 SDValue BasePtr = MemSD->getBasePtr();
13783 SDValue Val, Mask, VL;
13784
13785 bool IsCompressingStore = false;
13786 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
13787 Val = VPStore->getValue();
13788 Mask = VPStore->getMask();
13789 VL = VPStore->getVectorLength();
13790 } else {
13791 const auto *MStore = cast<MaskedStoreSDNode>(Op);
13792 Val = MStore->getValue();
13793 Mask = MStore->getMask();
13794 IsCompressingStore = MStore->isCompressingStore();
13795 }
13796
13797 bool IsUnmasked =
13798 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
13799
13800 MVT VT = Val.getSimpleValueType();
13801 MVT XLenVT = Subtarget.getXLenVT();
13802
13803 MVT ContainerVT = VT;
13804 if (VT.isFixedLengthVector()) {
13805 ContainerVT = getContainerForFixedLengthVector(VT);
13806
13807 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13808 if (!IsUnmasked || IsCompressingStore) {
13809 MVT MaskVT = getMaskTypeFor(ContainerVT);
13810 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13811 }
13812 }
13813
13814 if (!VL)
13815 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13816
13817 if (IsCompressingStore) {
13818 Val = DAG.getNode(
13819 ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13820 DAG.getTargetConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13821 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13822 VL =
13823 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13824 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13825 }
13826
13827 unsigned IntID =
13828 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13829 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13830 Ops.push_back(Val);
13831 Ops.push_back(BasePtr);
13832 if (!IsUnmasked)
13833 Ops.push_back(Mask);
13834 Ops.push_back(VL);
13835
13837 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13838}
13839
13840SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13841 SelectionDAG &DAG) const {
13842 SDLoc DL(Op);
13843 SDValue Val = Op.getOperand(0);
13844 SDValue Mask = Op.getOperand(1);
13845 SDValue Passthru = Op.getOperand(2);
13846
13847 MVT VT = Val.getSimpleValueType();
13848 MVT XLenVT = Subtarget.getXLenVT();
13849 MVT ContainerVT = VT;
13850 if (VT.isFixedLengthVector()) {
13851 ContainerVT = getContainerForFixedLengthVector(VT);
13852 MVT MaskVT = getMaskTypeFor(ContainerVT);
13853 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13854 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13855 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13856 }
13857
13858 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13859 SDValue Res =
13860 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13861 DAG.getTargetConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13862 Passthru, Val, Mask, VL);
13863
13864 if (VT.isFixedLengthVector())
13865 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13866
13867 return Res;
13868}
13869
13870SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13871 SelectionDAG &DAG) const {
13872 unsigned Opc = Op.getOpcode();
13873 SDLoc DL(Op);
13874 SDValue Chain = Op.getOperand(0);
13875 SDValue Op1 = Op.getOperand(1);
13876 SDValue Op2 = Op.getOperand(2);
13877 SDValue CC = Op.getOperand(3);
13878 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13879 MVT VT = Op.getSimpleValueType();
13880 MVT InVT = Op1.getSimpleValueType();
13881
13882 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13883 // condition code.
13884 if (Opc == ISD::STRICT_FSETCCS) {
13885 // Expand strict_fsetccs(x, oeq) to
13886 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13887 SDVTList VTList = Op->getVTList();
13888 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13889 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13890 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13891 Op2, OLECCVal);
13892 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13893 Op1, OLECCVal);
13894 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13895 Tmp1.getValue(1), Tmp2.getValue(1));
13896 // Tmp1 and Tmp2 might be the same node.
13897 if (Tmp1 != Tmp2)
13898 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13899 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13900 }
13901
13902 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13903 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13904 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13905 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13906 Op2, OEQCCVal);
13907 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13908 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13909 }
13910 }
13911
13912 MVT ContainerInVT = InVT;
13913 if (InVT.isFixedLengthVector()) {
13914 ContainerInVT = getContainerForFixedLengthVector(InVT);
13915 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13916 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13917 }
13918 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13919
13920 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13921
13922 SDValue Res;
13923 if (Opc == ISD::STRICT_FSETCC &&
13924 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13925 CCVal == ISD::SETOLE)) {
13926 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13927 // active when both input elements are ordered.
13928 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13929 SDValue OrderMask1 = DAG.getNode(
13930 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13931 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13932 True, VL});
13933 SDValue OrderMask2 = DAG.getNode(
13934 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13935 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13936 True, VL});
13937 Mask =
13938 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13939 // Use Mask as the passthru operand to let the result be 0 if either of the
13940 // inputs is unordered.
13941 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13942 DAG.getVTList(MaskVT, MVT::Other),
13943 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13944 } else {
13945 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13946 : RISCVISD::STRICT_FSETCCS_VL;
13947 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13948 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13949 }
13950
13951 if (VT.isFixedLengthVector()) {
13952 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13953 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13954 }
13955 return Res;
13956}
13957
13958// Lower vector ABS to smax(X, sub(0, X)).
13959SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13960 SDLoc DL(Op);
13961 MVT VT = Op.getSimpleValueType();
13962 SDValue X = Op.getOperand(0);
13963
13964 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13965 "Unexpected type for ISD::ABS");
13966
13967 MVT ContainerVT = VT;
13968 if (VT.isFixedLengthVector()) {
13969 ContainerVT = getContainerForFixedLengthVector(VT);
13970 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13971 }
13972
13973 SDValue Mask, VL;
13974 if (Op->getOpcode() == ISD::VP_ABS) {
13975 Mask = Op->getOperand(1);
13976 if (VT.isFixedLengthVector())
13977 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13978 Subtarget);
13979 VL = Op->getOperand(2);
13980 } else
13981 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13982
13984 if (Subtarget.hasStdExtZvabd()) {
13985 Result = DAG.getNode(RISCVISD::ABS_VL, DL, ContainerVT, X,
13986 DAG.getUNDEF(ContainerVT), Mask, VL);
13987 } else {
13988 SDValue SplatZero = DAG.getNode(
13989 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13990 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13991 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13992 DAG.getUNDEF(ContainerVT), Mask, VL);
13993 Result = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13994 DAG.getUNDEF(ContainerVT), Mask, VL);
13995 }
13996 if (VT.isFixedLengthVector())
13997 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13998 return Result;
13999}
14000
14001SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
14002 SelectionDAG &DAG) const {
14003 const auto &TSInfo =
14004 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
14005
14006 unsigned NewOpc = getRISCVVLOp(Op);
14007 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
14008 bool HasMask = TSInfo.hasMaskOp(NewOpc);
14009
14010 MVT VT = Op.getSimpleValueType();
14011 MVT ContainerVT = getContainerForFixedLengthVector(VT);
14012
14013 // Create list of operands by converting existing ones to scalable types.
14015 for (const SDValue &V : Op->op_values()) {
14016 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
14017
14018 // Pass through non-vector operands.
14019 if (!V.getValueType().isVector()) {
14020 Ops.push_back(V);
14021 continue;
14022 }
14023
14024 // "cast" fixed length vector to a scalable vector.
14025 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
14026 "Only fixed length vectors are supported!");
14027 MVT VContainerVT = ContainerVT.changeVectorElementType(
14028 V.getSimpleValueType().getVectorElementType());
14029 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
14030 }
14031
14032 SDLoc DL(Op);
14033 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
14034 if (HasPassthruOp)
14035 Ops.push_back(DAG.getUNDEF(ContainerVT));
14036 if (HasMask)
14037 Ops.push_back(Mask);
14038 Ops.push_back(VL);
14039
14040 // StrictFP operations have two result values. Their lowered result should
14041 // have same result count.
14042 if (Op->isStrictFPOpcode()) {
14043 SDValue ScalableRes =
14044 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
14045 Op->getFlags());
14046 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
14047 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
14048 }
14049
14050 SDValue ScalableRes =
14051 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
14052 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
14053}
14054
14055// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
14056// * Operands of each node are assumed to be in the same order.
14057// * The EVL operand is promoted from i32 to i64 on RV64.
14058// * Fixed-length vectors are converted to their scalable-vector container
14059// types.
14060SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
14061 const auto &TSInfo =
14062 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
14063
14064 unsigned RISCVISDOpc = getRISCVVLOp(Op);
14065 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
14066
14067 SDLoc DL(Op);
14068 MVT VT = Op.getSimpleValueType();
14070
14071 MVT ContainerVT = VT;
14072 if (VT.isFixedLengthVector())
14073 ContainerVT = getContainerForFixedLengthVector(VT);
14074
14075 for (const auto &OpIdx : enumerate(Op->ops())) {
14076 SDValue V = OpIdx.value();
14077 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
14078 // Add dummy passthru value before the mask. Or if there isn't a mask,
14079 // before EVL.
14080 if (HasPassthruOp) {
14081 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
14082 if (MaskIdx) {
14083 if (*MaskIdx == OpIdx.index())
14084 Ops.push_back(DAG.getUNDEF(ContainerVT));
14085 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
14086 OpIdx.index()) {
14087 if (Op.getOpcode() == ISD::VP_MERGE) {
14088 // For VP_MERGE, copy the false operand instead of an undef value.
14089 Ops.push_back(Ops.back());
14090 } else {
14091 assert(Op.getOpcode() == ISD::VP_SELECT);
14092 // For VP_SELECT, add an undef value.
14093 Ops.push_back(DAG.getUNDEF(ContainerVT));
14094 }
14095 }
14096 }
14097 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
14098 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
14099 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
14101 Subtarget.getXLenVT()));
14102 // Pass through operands which aren't fixed-length vectors.
14103 if (!V.getValueType().isFixedLengthVector()) {
14104 Ops.push_back(V);
14105 continue;
14106 }
14107 // "cast" fixed length vector to a scalable vector.
14108 MVT OpVT = V.getSimpleValueType();
14109 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
14110 assert(useRVVForFixedLengthVectorVT(OpVT) &&
14111 "Only fixed length vectors are supported!");
14112 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
14113 }
14114
14115 if (!VT.isFixedLengthVector())
14116 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
14117
14118 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
14119
14120 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
14121}
14122
14123SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
14124 SelectionDAG &DAG) const {
14125 SDLoc DL(Op);
14126 MVT VT = Op.getSimpleValueType();
14127
14128 SDValue Src = Op.getOperand(0);
14129 // NOTE: Mask is dropped.
14130 SDValue VL = Op.getOperand(2);
14131
14132 MVT ContainerVT = VT;
14133 if (VT.isFixedLengthVector()) {
14134 ContainerVT = getContainerForFixedLengthVector(VT);
14135 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
14136 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
14137 }
14138
14139 MVT XLenVT = Subtarget.getXLenVT();
14140 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
14141 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14142 DAG.getUNDEF(ContainerVT), Zero, VL);
14143
14144 SDValue SplatValue = DAG.getSignedConstant(
14145 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
14146 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14147 DAG.getUNDEF(ContainerVT), SplatValue, VL);
14148
14149 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
14150 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
14151 if (!VT.isFixedLengthVector())
14152 return Result;
14153 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14154}
14155
14156SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
14157 SelectionDAG &DAG) const {
14158 SDLoc DL(Op);
14159 MVT VT = Op.getSimpleValueType();
14160
14161 SDValue Op1 = Op.getOperand(0);
14162 SDValue Op2 = Op.getOperand(1);
14163 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
14164 // NOTE: Mask is dropped.
14165 SDValue VL = Op.getOperand(4);
14166
14167 MVT ContainerVT = VT;
14168 if (VT.isFixedLengthVector()) {
14169 ContainerVT = getContainerForFixedLengthVector(VT);
14170 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14171 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14172 }
14173
14175 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
14176
14177 switch (Condition) {
14178 default:
14179 break;
14180 // X != Y --> (X^Y)
14181 case ISD::SETNE:
14182 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
14183 break;
14184 // X == Y --> ~(X^Y)
14185 case ISD::SETEQ: {
14186 SDValue Temp =
14187 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
14188 Result =
14189 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
14190 break;
14191 }
14192 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
14193 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
14194 case ISD::SETGT:
14195 case ISD::SETULT: {
14196 SDValue Temp =
14197 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
14198 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
14199 break;
14200 }
14201 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
14202 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
14203 case ISD::SETLT:
14204 case ISD::SETUGT: {
14205 SDValue Temp =
14206 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
14207 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
14208 break;
14209 }
14210 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
14211 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
14212 case ISD::SETGE:
14213 case ISD::SETULE: {
14214 SDValue Temp =
14215 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
14216 Result = DAG.getNode(RISCVISD::VMOR_VL, DL, ContainerVT, Temp, Op2, VL);
14217 break;
14218 }
14219 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
14220 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
14221 case ISD::SETLE:
14222 case ISD::SETUGE: {
14223 SDValue Temp =
14224 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
14225 Result = DAG.getNode(RISCVISD::VMOR_VL, DL, ContainerVT, Temp, Op1, VL);
14226 break;
14227 }
14228 }
14229
14230 if (!VT.isFixedLengthVector())
14231 return Result;
14232 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14233}
14234
14235// Lower Floating-Point/Integer Type-Convert VP SDNodes
14236SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
14237 SelectionDAG &DAG) const {
14238 SDLoc DL(Op);
14239
14240 SDValue Src = Op.getOperand(0);
14241 SDValue Mask = Op.getOperand(1);
14242 SDValue VL = Op.getOperand(2);
14243 unsigned RISCVISDOpc = getRISCVVLOp(Op);
14244
14245 MVT DstVT = Op.getSimpleValueType();
14246 MVT SrcVT = Src.getSimpleValueType();
14247 if (DstVT.isFixedLengthVector()) {
14248 DstVT = getContainerForFixedLengthVector(DstVT);
14249 SrcVT = getContainerForFixedLengthVector(SrcVT);
14250 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
14251 MVT MaskVT = getMaskTypeFor(DstVT);
14252 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14253 }
14254
14255 unsigned DstEltSize = DstVT.getScalarSizeInBits();
14256 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
14257
14259 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
14260 if (SrcVT.isInteger()) {
14261 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
14262
14263 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
14264 ? RISCVISD::VSEXT_VL
14265 : RISCVISD::VZEXT_VL;
14266
14267 // Do we need to do any pre-widening before converting?
14268 if (SrcEltSize == 1) {
14269 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
14270 MVT XLenVT = Subtarget.getXLenVT();
14271 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
14272 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
14273 DAG.getUNDEF(IntVT), Zero, VL);
14274 SDValue One = DAG.getSignedConstant(
14275 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
14276 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
14277 DAG.getUNDEF(IntVT), One, VL);
14278 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
14279 ZeroSplat, DAG.getUNDEF(IntVT), VL);
14280 } else if (DstEltSize > (2 * SrcEltSize)) {
14281 // Widen before converting.
14282 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
14283 DstVT.getVectorElementCount());
14284 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
14285 }
14286
14287 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
14288 } else {
14289 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
14290 "Wrong input/output vector types");
14291
14292 // Convert f16 to f32 then convert f32 to i64.
14293 if (DstEltSize > (2 * SrcEltSize)) {
14294 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
14295 MVT InterimFVT =
14296 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
14297 Src =
14298 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
14299 }
14300
14301 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
14302 }
14303 } else { // Narrowing + Conversion
14304 if (SrcVT.isInteger()) {
14305 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
14306 // First do a narrowing convert to an FP type half the size, then round
14307 // the FP type to a small FP type if needed.
14308
14309 MVT InterimFVT = DstVT;
14310 if (SrcEltSize > (2 * DstEltSize)) {
14311 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
14312 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
14313 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
14314 }
14315
14316 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
14317
14318 if (InterimFVT != DstVT) {
14319 Src = Result;
14320 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
14321 }
14322 } else {
14323 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
14324 "Wrong input/output vector types");
14325 // First do a narrowing conversion to an integer half the size, then
14326 // truncate if needed.
14327
14328 if (DstEltSize == 1) {
14329 // First convert to the same size integer, then convert to mask using
14330 // setcc.
14331 assert(SrcEltSize >= 16 && "Unexpected FP type!");
14332 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
14333 DstVT.getVectorElementCount());
14334 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
14335
14336 // Compare the integer result to 0. The integer should be 0 or 1/-1,
14337 // otherwise the conversion was undefined.
14338 MVT XLenVT = Subtarget.getXLenVT();
14339 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
14340 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
14341 DAG.getUNDEF(InterimIVT), SplatZero, VL);
14342 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
14343 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
14344 DAG.getUNDEF(DstVT), Mask, VL});
14345 } else {
14346 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
14347 DstVT.getVectorElementCount());
14348
14349 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
14350
14351 while (InterimIVT != DstVT) {
14352 SrcEltSize /= 2;
14353 Src = Result;
14354 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
14355 DstVT.getVectorElementCount());
14356 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
14357 Src, Mask, VL);
14358 }
14359 }
14360 }
14361 }
14362
14363 MVT VT = Op.getSimpleValueType();
14364 if (!VT.isFixedLengthVector())
14365 return Result;
14366 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14367}
14368
14369SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
14370 SelectionDAG &DAG) const {
14371 SDLoc DL(Op);
14372 MVT VT = Op.getSimpleValueType();
14373 MVT XLenVT = Subtarget.getXLenVT();
14374
14375 SDValue Mask = Op.getOperand(0);
14376 SDValue TrueVal = Op.getOperand(1);
14377 SDValue FalseVal = Op.getOperand(2);
14378 SDValue VL = Op.getOperand(3);
14379
14380 // Use default legalization if a vector of EVL type would be legal.
14381 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
14383 if (isTypeLegal(EVLVecVT))
14384 return SDValue();
14385
14386 MVT ContainerVT = VT;
14387 if (VT.isFixedLengthVector()) {
14388 ContainerVT = getContainerForFixedLengthVector(VT);
14389 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
14390 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
14391 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
14392 }
14393
14394 // Promote to a vector of i8.
14395 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
14396
14397 // Promote TrueVal and FalseVal using VLMax.
14398 // FIXME: Is there a better way to do this?
14399 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
14400 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
14401 DAG.getUNDEF(PromotedVT),
14402 DAG.getConstant(1, DL, XLenVT), VLMax);
14403 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
14404 DAG.getUNDEF(PromotedVT),
14405 DAG.getConstant(0, DL, XLenVT), VLMax);
14406 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
14407 SplatZero, DAG.getUNDEF(PromotedVT), VL);
14408 // Any element past VL uses FalseVal, so use VLMax
14409 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
14410 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
14411
14412 // VP_MERGE the two promoted values.
14413 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
14414 TrueVal, FalseVal, FalseVal, VL);
14415
14416 // Convert back to mask.
14417 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
14418 SDValue Result = DAG.getNode(
14419 RISCVISD::SETCC_VL, DL, ContainerVT,
14420 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
14421 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
14422
14423 if (VT.isFixedLengthVector())
14424 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14425 return Result;
14426}
14427
14428SDValue
14429RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
14430 SelectionDAG &DAG) const {
14431 using namespace SDPatternMatch;
14432
14433 SDLoc DL(Op);
14434
14435 SDValue Op1 = Op.getOperand(0);
14436 SDValue Op2 = Op.getOperand(1);
14437 SDValue Offset = Op.getOperand(2);
14438 SDValue Mask = Op.getOperand(3);
14439 SDValue EVL1 = Op.getOperand(4);
14440 SDValue EVL2 = Op.getOperand(5);
14441
14442 const MVT XLenVT = Subtarget.getXLenVT();
14443 MVT VT = Op.getSimpleValueType();
14444 MVT ContainerVT = VT;
14445 if (VT.isFixedLengthVector()) {
14446 ContainerVT = getContainerForFixedLengthVector(VT);
14447 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14448 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14449 MVT MaskVT = getMaskTypeFor(ContainerVT);
14450 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14451 }
14452
14453 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
14454 if (IsMaskVector) {
14455 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
14456
14457 // Expand input operands
14458 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14459 DAG.getUNDEF(ContainerVT),
14460 DAG.getConstant(1, DL, XLenVT), EVL1);
14461 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14462 DAG.getUNDEF(ContainerVT),
14463 DAG.getConstant(0, DL, XLenVT), EVL1);
14464 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
14465 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
14466
14467 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14468 DAG.getUNDEF(ContainerVT),
14469 DAG.getConstant(1, DL, XLenVT), EVL2);
14470 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14471 DAG.getUNDEF(ContainerVT),
14472 DAG.getConstant(0, DL, XLenVT), EVL2);
14473 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
14474 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
14475 }
14476
14477 auto getVectorFirstEle = [](SDValue Vec) {
14478 SDValue FirstEle;
14479 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
14480 return FirstEle;
14481
14482 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
14484 return Vec.getOperand(0);
14485
14486 return SDValue();
14487 };
14488
14489 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
14490 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
14491 MVT EltVT = ContainerVT.getVectorElementType();
14493 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
14494 EltVT == MVT::bf16) {
14495 EltVT = EltVT.changeTypeToInteger();
14496 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
14497 Op2 = DAG.getBitcast(ContainerVT, Op2);
14498 FirstEle =
14499 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
14500 }
14501 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
14502 : RISCVISD::VSLIDE1UP_VL,
14503 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
14504 FirstEle, Mask, EVL2);
14505 Result = DAG.getBitcast(
14507 Result);
14508 return VT.isFixedLengthVector()
14509 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
14510 : Result;
14511 }
14512
14513 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
14514 SDValue DownOffset, UpOffset;
14515 if (ImmValue >= 0) {
14516 // The operand is a TargetConstant, we need to rebuild it as a regular
14517 // constant.
14518 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
14519 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
14520 } else {
14521 // The operand is a TargetConstant, we need to rebuild it as a regular
14522 // constant rather than negating the original operand.
14523 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
14524 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
14525 }
14526
14527 if (ImmValue != 0)
14528 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14529 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
14530 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
14531 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
14532 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
14533
14534 if (IsMaskVector) {
14535 // Truncate Result back to a mask vector (Result has same EVL as Op2)
14536 Result = DAG.getNode(
14537 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
14538 {Result, DAG.getConstant(0, DL, ContainerVT),
14539 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
14540 Mask, EVL2});
14541 }
14542
14543 if (!VT.isFixedLengthVector())
14544 return Result;
14545 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14546}
14547
14548SDValue
14549RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
14550 SelectionDAG &DAG) const {
14551 SDLoc DL(Op);
14552 MVT VT = Op.getSimpleValueType();
14553 MVT XLenVT = Subtarget.getXLenVT();
14554
14555 SDValue Op1 = Op.getOperand(0);
14556 SDValue Mask = Op.getOperand(1);
14557 SDValue EVL = Op.getOperand(2);
14558
14559 MVT ContainerVT = VT;
14560 if (VT.isFixedLengthVector()) {
14561 ContainerVT = getContainerForFixedLengthVector(VT);
14562 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14563 MVT MaskVT = getMaskTypeFor(ContainerVT);
14564 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14565 }
14566
14567 MVT GatherVT = ContainerVT;
14568 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
14569 // Check if we are working with mask vectors
14570 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
14571 if (IsMaskVector) {
14572 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
14573
14574 // Expand input operand
14575 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14576 DAG.getUNDEF(IndicesVT),
14577 DAG.getConstant(1, DL, XLenVT), EVL);
14578 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14579 DAG.getUNDEF(IndicesVT),
14580 DAG.getConstant(0, DL, XLenVT), EVL);
14581 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
14582 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
14583 }
14584
14585 unsigned EltSize = GatherVT.getScalarSizeInBits();
14586 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
14587 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
14588 unsigned MaxVLMAX =
14589 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
14590
14591 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
14592 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
14593 // to use vrgatherei16.vv.
14594 // TODO: It's also possible to use vrgatherei16.vv for other types to
14595 // decrease register width for the index calculation.
14596 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
14597 if (MaxVLMAX > 256 && EltSize == 8) {
14598 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
14599 // Split the vector in half and reverse each half using a full register
14600 // reverse.
14601 // Swap the halves and concatenate them.
14602 // Slide the concatenated result by (VLMax - VL).
14603 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
14604 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
14605 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
14606
14607 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
14608 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
14609
14610 // Reassemble the low and high pieces reversed.
14611 // NOTE: this Result is unmasked (because we do not need masks for
14612 // shuffles). If in the future this has to change, we can use a SELECT_VL
14613 // between Result and UNDEF using the mask originally passed to VP_REVERSE
14614 SDValue Result =
14615 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
14616
14617 // Slide off any elements from past EVL that were reversed into the low
14618 // elements.
14619 SDValue VLMax =
14620 DAG.getElementCount(DL, XLenVT, GatherVT.getVectorElementCount());
14621 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
14622
14623 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
14624 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
14625
14626 if (IsMaskVector) {
14627 // Truncate Result back to a mask vector
14628 Result =
14629 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
14630 {Result, DAG.getConstant(0, DL, GatherVT),
14632 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
14633 }
14634
14635 if (!VT.isFixedLengthVector())
14636 return Result;
14637 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14638 }
14639
14640 // Just promote the int type to i16 which will double the LMUL.
14641 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
14642 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
14643 }
14644
14645 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
14646 SDValue VecLen =
14647 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
14648 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14649 DAG.getUNDEF(IndicesVT), VecLen, EVL);
14650 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
14651 DAG.getUNDEF(IndicesVT), Mask, EVL);
14652 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
14653 DAG.getUNDEF(GatherVT), Mask, EVL);
14654
14655 if (IsMaskVector) {
14656 // Truncate Result back to a mask vector
14657 Result = DAG.getNode(
14658 RISCVISD::SETCC_VL, DL, ContainerVT,
14659 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
14660 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
14661 }
14662
14663 if (!VT.isFixedLengthVector())
14664 return Result;
14665 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14666}
14667
14668SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
14669 SelectionDAG &DAG) const {
14670 MVT VT = Op.getSimpleValueType();
14671 if (VT.getVectorElementType() != MVT::i1)
14672 return lowerVPOp(Op, DAG);
14673
14674 // It is safe to drop mask parameter as masked-off elements are undef.
14675 SDValue Op1 = Op->getOperand(0);
14676 SDValue Op2 = Op->getOperand(1);
14677 SDValue VL = Op->getOperand(3);
14678
14679 MVT ContainerVT = VT;
14680 const bool IsFixed = VT.isFixedLengthVector();
14681 if (IsFixed) {
14682 ContainerVT = getContainerForFixedLengthVector(VT);
14683 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14684 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14685 }
14686
14687 SDLoc DL(Op);
14688 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
14689 if (!IsFixed)
14690 return Val;
14691 return convertFromScalableVector(VT, Val, DAG, Subtarget);
14692}
14693
14694SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
14695 SelectionDAG &DAG) const {
14696 SDLoc DL(Op);
14697 MVT XLenVT = Subtarget.getXLenVT();
14698 MVT VT = Op.getSimpleValueType();
14699 MVT ContainerVT = VT;
14700 if (VT.isFixedLengthVector())
14701 ContainerVT = getContainerForFixedLengthVector(VT);
14702
14703 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14704
14705 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
14706 // Check if the mask is known to be all ones
14707 SDValue Mask = VPNode->getMask();
14708 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14709
14710 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
14711 : Intrinsic::riscv_vlse_mask,
14712 DL, XLenVT);
14713 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
14714 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
14715 VPNode->getStride()};
14716 if (!IsUnmasked) {
14717 if (VT.isFixedLengthVector()) {
14718 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14719 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14720 }
14721 Ops.push_back(Mask);
14722 }
14723 Ops.push_back(VPNode->getVectorLength());
14724 if (!IsUnmasked) {
14725 SDValue Policy =
14727 Ops.push_back(Policy);
14728 }
14729
14730 SDValue Result =
14732 VPNode->getMemoryVT(), VPNode->getMemOperand());
14733 SDValue Chain = Result.getValue(1);
14734
14735 if (VT.isFixedLengthVector())
14736 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14737
14738 return DAG.getMergeValues({Result, Chain}, DL);
14739}
14740
14741SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
14742 SelectionDAG &DAG) const {
14743 SDLoc DL(Op);
14744 MVT XLenVT = Subtarget.getXLenVT();
14745
14746 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
14747 SDValue StoreVal = VPNode->getValue();
14748 MVT VT = StoreVal.getSimpleValueType();
14749 MVT ContainerVT = VT;
14750 if (VT.isFixedLengthVector()) {
14751 ContainerVT = getContainerForFixedLengthVector(VT);
14752 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
14753 }
14754
14755 // Check if the mask is known to be all ones
14756 SDValue Mask = VPNode->getMask();
14757 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14758
14759 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
14760 : Intrinsic::riscv_vsse_mask,
14761 DL, XLenVT);
14762 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
14763 VPNode->getBasePtr(), VPNode->getStride()};
14764 if (!IsUnmasked) {
14765 if (VT.isFixedLengthVector()) {
14766 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14767 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14768 }
14769 Ops.push_back(Mask);
14770 }
14771 Ops.push_back(VPNode->getVectorLength());
14772
14773 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14774 Ops, VPNode->getMemoryVT(),
14775 VPNode->getMemOperand());
14776}
14777
14778// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14779// matched to a RVV indexed load. The RVV indexed load instructions only
14780// support the "unsigned unscaled" addressing mode; indices are implicitly
14781// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14782// signed or scaled indexing is extended to the XLEN value type and scaled
14783// accordingly.
14784SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14785 SelectionDAG &DAG) const {
14786 SDLoc DL(Op);
14787 MVT VT = Op.getSimpleValueType();
14788
14789 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14790 EVT MemVT = MemSD->getMemoryVT();
14791 MachineMemOperand *MMO = MemSD->getMemOperand();
14792 SDValue Chain = MemSD->getChain();
14793 SDValue BasePtr = MemSD->getBasePtr();
14794
14795 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14796 SDValue Index, Mask, PassThru, VL;
14797
14798 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14799 Index = VPGN->getIndex();
14800 Mask = VPGN->getMask();
14801 PassThru = DAG.getUNDEF(VT);
14802 VL = VPGN->getVectorLength();
14803 // VP doesn't support extending loads.
14805 } else {
14806 // Else it must be a MGATHER.
14807 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14808 Index = MGN->getIndex();
14809 Mask = MGN->getMask();
14810 PassThru = MGN->getPassThru();
14811 LoadExtType = MGN->getExtensionType();
14812 }
14813
14814 MVT IndexVT = Index.getSimpleValueType();
14815 MVT XLenVT = Subtarget.getXLenVT();
14816
14818 "Unexpected VTs!");
14819 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14820 // Targets have to explicitly opt-in for extending vector loads.
14821 assert(LoadExtType == ISD::NON_EXTLOAD &&
14822 "Unexpected extending MGATHER/VP_GATHER");
14823
14824 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14825 // the selection of the masked intrinsics doesn't do this for us.
14826 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14827
14828 MVT ContainerVT = VT;
14829 if (VT.isFixedLengthVector()) {
14830 ContainerVT = getContainerForFixedLengthVector(VT);
14831 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14832 ContainerVT.getVectorElementCount());
14833
14834 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14835
14836 if (!IsUnmasked) {
14837 MVT MaskVT = getMaskTypeFor(ContainerVT);
14838 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14839 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14840 }
14841 }
14842
14843 if (!VL)
14844 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14845
14846 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14847 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14848 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14849 }
14850
14851 unsigned IntID =
14852 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14853 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14854 if (IsUnmasked)
14855 Ops.push_back(DAG.getUNDEF(ContainerVT));
14856 else
14857 Ops.push_back(PassThru);
14858 Ops.push_back(BasePtr);
14859 Ops.push_back(Index);
14860 if (!IsUnmasked)
14861 Ops.push_back(Mask);
14862 Ops.push_back(VL);
14863 if (!IsUnmasked)
14864 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14865
14866 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14867 SDValue Result =
14868 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14869 Chain = Result.getValue(1);
14870
14871 if (VT.isFixedLengthVector())
14872 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14873
14874 return DAG.getMergeValues({Result, Chain}, DL);
14875}
14876
14877// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14878// matched to a RVV indexed store. The RVV indexed store instructions only
14879// support the "unsigned unscaled" addressing mode; indices are implicitly
14880// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14881// signed or scaled indexing is extended to the XLEN value type and scaled
14882// accordingly.
14883SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14884 SelectionDAG &DAG) const {
14885 SDLoc DL(Op);
14886 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14887 EVT MemVT = MemSD->getMemoryVT();
14888 MachineMemOperand *MMO = MemSD->getMemOperand();
14889 SDValue Chain = MemSD->getChain();
14890 SDValue BasePtr = MemSD->getBasePtr();
14891
14892 [[maybe_unused]] bool IsTruncatingStore = false;
14893 SDValue Index, Mask, Val, VL;
14894
14895 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14896 Index = VPSN->getIndex();
14897 Mask = VPSN->getMask();
14898 Val = VPSN->getValue();
14899 VL = VPSN->getVectorLength();
14900 // VP doesn't support truncating stores.
14901 IsTruncatingStore = false;
14902 } else {
14903 // Else it must be a MSCATTER.
14904 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14905 Index = MSN->getIndex();
14906 Mask = MSN->getMask();
14907 Val = MSN->getValue();
14908 IsTruncatingStore = MSN->isTruncatingStore();
14909 }
14910
14911 MVT VT = Val.getSimpleValueType();
14912 MVT IndexVT = Index.getSimpleValueType();
14913 MVT XLenVT = Subtarget.getXLenVT();
14914
14916 "Unexpected VTs!");
14917 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14918 // Targets have to explicitly opt-in for extending vector loads and
14919 // truncating vector stores.
14920 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14921
14922 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14923 // the selection of the masked intrinsics doesn't do this for us.
14924 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14925
14926 MVT ContainerVT = VT;
14927 if (VT.isFixedLengthVector()) {
14928 ContainerVT = getContainerForFixedLengthVector(VT);
14929 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14930 ContainerVT.getVectorElementCount());
14931
14932 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14933 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14934
14935 if (!IsUnmasked) {
14936 MVT MaskVT = getMaskTypeFor(ContainerVT);
14937 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14938 }
14939 }
14940
14941 if (!VL)
14942 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14943
14944 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14945 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14946 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14947 }
14948
14949 unsigned IntID =
14950 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14951 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14952 Ops.push_back(Val);
14953 Ops.push_back(BasePtr);
14954 Ops.push_back(Index);
14955 if (!IsUnmasked)
14956 Ops.push_back(Mask);
14957 Ops.push_back(VL);
14958
14960 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14961}
14962
14963SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14964 SelectionDAG &DAG) const {
14965 const MVT XLenVT = Subtarget.getXLenVT();
14966 SDLoc DL(Op);
14967 SDValue Chain = Op->getOperand(0);
14968 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14969 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14970 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14971
14972 // Encoding used for rounding mode in RISC-V differs from that used in
14973 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14974 // table, which consists of a sequence of 4-bit fields, each representing
14975 // corresponding FLT_ROUNDS mode.
14976 static const int Table =
14982
14983 SDValue Shift =
14984 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14985 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14986 DAG.getConstant(Table, DL, XLenVT), Shift);
14987 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14988 DAG.getConstant(7, DL, XLenVT));
14989
14990 return DAG.getMergeValues({Masked, Chain}, DL);
14991}
14992
14993SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14994 SelectionDAG &DAG) const {
14995 const MVT XLenVT = Subtarget.getXLenVT();
14996 SDLoc DL(Op);
14997 SDValue Chain = Op->getOperand(0);
14998 SDValue RMValue = Op->getOperand(1);
14999 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
15000
15001 // Encoding used for rounding mode in RISC-V differs from that used in
15002 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
15003 // a table, which consists of a sequence of 4-bit fields, each representing
15004 // corresponding RISC-V mode.
15005 static const unsigned Table =
15011
15012 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
15013
15014 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
15015 DAG.getConstant(2, DL, XLenVT));
15016 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
15017 DAG.getConstant(Table, DL, XLenVT), Shift);
15018 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
15019 DAG.getConstant(0x7, DL, XLenVT));
15020 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
15021 RMValue);
15022}
15023
15024SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
15025 SelectionDAG &DAG) const {
15026 const MVT XLenVT = Subtarget.getXLenVT();
15027 SDLoc DL(Op);
15028 SDValue Chain = Op->getOperand(0);
15029 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
15030 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
15031 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
15032}
15033
15034SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
15035 SelectionDAG &DAG) const {
15036 const MVT XLenVT = Subtarget.getXLenVT();
15037 SDLoc DL(Op);
15038 SDValue Chain = Op->getOperand(0);
15039 SDValue EnvValue = Op->getOperand(1);
15040 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
15041
15042 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
15043 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
15044 EnvValue);
15045}
15046
15047SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
15048 SelectionDAG &DAG) const {
15049 const MVT XLenVT = Subtarget.getXLenVT();
15050 SDLoc DL(Op);
15051 SDValue Chain = Op->getOperand(0);
15052 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
15053 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
15054
15055 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
15056 EnvValue);
15057}
15058
15061
15062SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
15063 SelectionDAG &DAG) const {
15064 const MVT XLenVT = Subtarget.getXLenVT();
15065 SDLoc DL(Op);
15066 SDValue Chain = Op->getOperand(0);
15067 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
15068 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
15069 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
15070 Chain = Result.getValue(1);
15071 return DAG.getMergeValues({Result, Chain}, DL);
15072}
15073
15074SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
15075 SelectionDAG &DAG) const {
15076 const MVT XLenVT = Subtarget.getXLenVT();
15077 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
15078 SDLoc DL(Op);
15079 SDValue Chain = Op->getOperand(0);
15080 SDValue EnvValue = Op->getOperand(1);
15081 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
15082 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
15083
15084 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
15085 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
15086 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
15087 ModeMask);
15088 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
15089 EnvValue);
15090}
15091
15092SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
15093 SelectionDAG &DAG) const {
15094 const MVT XLenVT = Subtarget.getXLenVT();
15095 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
15096 SDLoc DL(Op);
15097 SDValue Chain = Op->getOperand(0);
15098 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
15099 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
15100
15101 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
15102 ModeMask);
15103}
15104
15105SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
15106 SelectionDAG &DAG) const {
15107 MachineFunction &MF = DAG.getMachineFunction();
15108
15109 bool isRISCV64 = Subtarget.is64Bit();
15110 EVT PtrVT = getPointerTy(DAG.getDataLayout());
15111
15112 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
15113 return DAG.getFrameIndex(FI, PtrVT);
15114}
15115
15116// Returns the opcode of the target-specific SDNode that implements the 32-bit
15117// form of the given Opcode.
15118static unsigned getRISCVWOpcode(unsigned Opcode) {
15119 switch (Opcode) {
15120 default:
15121 llvm_unreachable("Unexpected opcode");
15122 case ISD::SHL:
15123 return RISCVISD::SLLW;
15124 case ISD::SRA:
15125 return RISCVISD::SRAW;
15126 case ISD::SRL:
15127 return RISCVISD::SRLW;
15128 case ISD::SDIV:
15129 return RISCVISD::DIVW;
15130 case ISD::UDIV:
15131 return RISCVISD::DIVUW;
15132 case ISD::UREM:
15133 return RISCVISD::REMUW;
15134 case ISD::ROTL:
15135 return RISCVISD::ROLW;
15136 case ISD::ROTR:
15137 return RISCVISD::RORW;
15138 }
15139}
15140
15141// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
15142// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
15143// otherwise be promoted to i64, making it difficult to select the
15144// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
15145// type i8/i16/i32 is lost.
15147 unsigned ExtOpc = ISD::ANY_EXTEND) {
15148 SDLoc DL(N);
15149 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
15150 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
15151 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
15152 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
15153 // ReplaceNodeResults requires we maintain the same type for the return value.
15154 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
15155}
15156
15157// Converts the given 32-bit operation to a i64 operation with signed extension
15158// semantic to reduce the signed extension instructions.
15160 SDLoc DL(N);
15161 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15162 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15163 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
15164 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
15165 DAG.getValueType(MVT::i32));
15166 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
15167}
15168
15171 SelectionDAG &DAG) const {
15172 SDLoc DL(N);
15173 switch (N->getOpcode()) {
15174 default:
15175 llvm_unreachable("Don't know how to custom type legalize this operation!");
15178 case ISD::FP_TO_SINT:
15179 case ISD::FP_TO_UINT: {
15180 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15181 "Unexpected custom legalisation");
15182 bool IsStrict = N->isStrictFPOpcode();
15183 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
15184 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
15185 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
15186 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
15188 if (!isTypeLegal(Op0.getValueType()))
15189 return;
15190 if (IsStrict) {
15191 SDValue Chain = N->getOperand(0);
15192 // In absence of Zfh, promote f16 to f32, then convert.
15193 if (Op0.getValueType() == MVT::f16 &&
15194 !Subtarget.hasStdExtZfhOrZhinx()) {
15195 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
15196 {Chain, Op0});
15197 Chain = Op0.getValue(1);
15198 }
15199 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
15200 : RISCVISD::STRICT_FCVT_WU_RV64;
15201 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
15202 SDValue Res = DAG.getNode(
15203 Opc, DL, VTs, Chain, Op0,
15204 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
15205 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15206 Results.push_back(Res.getValue(1));
15207 return;
15208 }
15209 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
15210 // convert.
15211 if ((Op0.getValueType() == MVT::f16 &&
15212 !Subtarget.hasStdExtZfhOrZhinx()) ||
15213 Op0.getValueType() == MVT::bf16)
15214 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
15215
15216 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
15217 SDValue Res =
15218 DAG.getNode(Opc, DL, MVT::i64, Op0,
15219 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
15220 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15221 return;
15222 }
15223 // If the FP type needs to be softened, emit a library call using the 'si'
15224 // version. If we left it to default legalization we'd end up with 'di'. If
15225 // the FP type doesn't need to be softened just let generic type
15226 // legalization promote the result type.
15227 RTLIB::Libcall LC;
15228 if (IsSigned)
15229 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
15230 else
15231 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
15232 MakeLibCallOptions CallOptions;
15233 EVT OpVT = Op0.getValueType();
15234 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
15235 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
15236 SDValue Result;
15237 std::tie(Result, Chain) =
15238 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
15239 Results.push_back(Result);
15240 if (IsStrict)
15241 Results.push_back(Chain);
15242 break;
15243 }
15244 case ISD::LROUND: {
15245 SDValue Op0 = N->getOperand(0);
15246 EVT Op0VT = Op0.getValueType();
15247 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
15249 if (!isTypeLegal(Op0VT))
15250 return;
15251
15252 // In absence of Zfh, promote f16 to f32, then convert.
15253 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
15254 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
15255
15256 SDValue Res =
15257 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
15258 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
15259 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15260 return;
15261 }
15262 // If the FP type needs to be softened, emit a library call to lround. We'll
15263 // need to truncate the result. We assume any value that doesn't fit in i32
15264 // is allowed to return an unspecified value.
15265 RTLIB::Libcall LC =
15266 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
15267 MakeLibCallOptions CallOptions;
15268 EVT OpVT = Op0.getValueType();
15269 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
15270 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
15271 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
15272 Results.push_back(Result);
15273 break;
15274 }
15277 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
15278 "has custom type legalization on riscv32");
15279
15280 SDValue LoCounter, HiCounter;
15281 MVT XLenVT = Subtarget.getXLenVT();
15282 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
15283 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
15284 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
15285 } else {
15286 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
15287 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
15288 }
15289 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
15290 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
15291 N->getOperand(0), LoCounter, HiCounter);
15292
15293 Results.push_back(
15294 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
15295 Results.push_back(RCW.getValue(2));
15296 break;
15297 }
15298 case ISD::LOAD: {
15299 if (!ISD::isNON_EXTLoad(N))
15300 return;
15301
15302 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
15303 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
15305
15306 if (N->getValueType(0) == MVT::i64) {
15307 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
15308 "Unexpected custom legalisation");
15309
15310 if (Ld->getAlign() < Subtarget.getZilsdAlign())
15311 return;
15312
15313 SDLoc DL(N);
15314 SDValue Result = DAG.getMemIntrinsicNode(
15315 RISCVISD::LD_RV32, DL,
15316 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
15317 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
15318 SDValue Lo = Result.getValue(0);
15319 SDValue Hi = Result.getValue(1);
15320 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
15321 Results.append({Pair, Result.getValue(2)});
15322 return;
15323 }
15324
15325 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15326 "Unexpected custom legalisation");
15327
15328 SDLoc dl(N);
15329 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
15330 Ld->getBasePtr(), Ld->getMemoryVT(),
15331 Ld->getMemOperand());
15332 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
15333 Results.push_back(Res.getValue(1));
15334 return;
15335 }
15336 case ISD::MUL: {
15337 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
15338 unsigned XLen = Subtarget.getXLen();
15339 if (Size > XLen) {
15340 // This multiply needs to be expanded, try to use MULH+MUL or WMUL if
15341 // possible. We duplicate the default legalization to
15342 // MULHU/MULHS/UMUL_LOHI/SMUL_LOHI to minimize the number of calls to
15343 // MaskedValueIsZero and ComputeNumSignBits
15344 // FIXME: Should we have a target independent MULHSU/WMULSU node? Are
15345 // there are other targets that could use it?
15346 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
15347
15348 auto MakeMULPair = [&](SDValue L, SDValue R, unsigned HighOpc,
15349 unsigned LoHiOpc) {
15350 MVT XLenVT = Subtarget.getXLenVT();
15351 L = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, L);
15352 R = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, R);
15353 SDValue Lo, Hi;
15354 if (Subtarget.hasStdExtP() && !Subtarget.is64Bit()) {
15355 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
15356 Lo = DAG.getNode(LoHiOpc, DL, VTs, L, R);
15357 Hi = Lo.getValue(1);
15358 } else {
15359 Lo = DAG.getNode(ISD::MUL, DL, XLenVT, L, R);
15360 Hi = DAG.getNode(HighOpc, DL, XLenVT, L, R);
15361 }
15362 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
15363 };
15364
15365 SDValue LHS = N->getOperand(0);
15366 SDValue RHS = N->getOperand(1);
15367
15368 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
15369 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
15370 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
15371 if (LHSIsU && RHSIsU) {
15372 Results.push_back(MakeMULPair(LHS, RHS, ISD::MULHU, ISD::UMUL_LOHI));
15373 return;
15374 }
15375
15376 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
15377 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
15378 if (LHSIsS && RHSIsS)
15379 Results.push_back(MakeMULPair(LHS, RHS, ISD::MULHS, ISD::SMUL_LOHI));
15380 else if (RHSIsU && LHSIsS)
15381 Results.push_back(
15382 MakeMULPair(LHS, RHS, RISCVISD::MULHSU, RISCVISD::WMULSU));
15383 else if (LHSIsU && RHSIsS)
15384 Results.push_back(
15385 MakeMULPair(RHS, LHS, RISCVISD::MULHSU, RISCVISD::WMULSU));
15386
15387 return;
15388 }
15389 [[fallthrough]];
15390 }
15391 case ISD::ADD:
15392 case ISD::SUB:
15393 if (N->getValueType(0) == MVT::i64) {
15394 assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&
15395 "Unexpected custom legalisation");
15396
15397 // Expand to ADDD/SUBD.
15398 auto [LHSLo, LHSHi] =
15399 DAG.SplitScalar(N->getOperand(0), DL, MVT::i32, MVT::i32);
15400 auto [RHSLo, RHSHi] =
15401 DAG.SplitScalar(N->getOperand(1), DL, MVT::i32, MVT::i32);
15402 unsigned Opc =
15403 N->getOpcode() == ISD::ADD ? RISCVISD::ADDD : RISCVISD::SUBD;
15404 SDValue Res = DAG.getNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::i32),
15405 LHSLo, LHSHi, RHSLo, RHSHi);
15406 Res = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Res, Res.getValue(1));
15407 Results.push_back(Res);
15408 return;
15409 }
15410
15411 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15412 "Unexpected custom legalisation");
15413 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
15414 break;
15415 case ISD::SHL:
15416 case ISD::SRA:
15417 case ISD::SRL: {
15418 EVT VT = N->getValueType(0);
15419 if (VT.isFixedLengthVector() && Subtarget.hasStdExtP()) {
15420 assert(Subtarget.is64Bit() && (VT == MVT::v2i16 || VT == MVT::v4i8) &&
15421 "Unexpected vector type for P-extension shift");
15422
15423 // If shift amount is a splat, don't scalarize - let normal widening
15424 // and SIMD patterns handle it (pslli.h, psrli.h, etc.)
15425 SDValue ShiftAmt = N->getOperand(1);
15426 if (DAG.isSplatValue(ShiftAmt, /*AllowUndefs=*/true))
15427 break;
15428
15429 EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT);
15430 unsigned WidenNumElts = WidenVT.getVectorNumElements();
15431 // Unroll with OrigNumElts operations, padding result to WidenNumElts
15432 SDValue Res = DAG.UnrollVectorOp(N, WidenNumElts);
15433 Results.push_back(Res);
15434 break;
15435 }
15436
15437 if (VT == MVT::i64) {
15438 assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&
15439 "Unexpected custom legalisation");
15440
15441 SDValue LHS = N->getOperand(0);
15442 SDValue ShAmt = N->getOperand(1);
15443
15444 unsigned WideOpc = 0;
15445 APInt HighMask = APInt::getHighBitsSet(64, 32);
15446 if (DAG.MaskedValueIsZero(LHS, HighMask))
15447 WideOpc = RISCVISD::WSLL;
15448 else if (DAG.ComputeMaxSignificantBits(LHS) <= 32)
15449 WideOpc = RISCVISD::WSLA;
15450
15451 if (WideOpc) {
15452 SDValue Res =
15453 DAG.getNode(WideOpc, DL, DAG.getVTList(MVT::i32, MVT::i32),
15454 DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LHS),
15455 DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ShAmt));
15456 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0),
15457 Res, Res.getValue(1)));
15458 return;
15459 }
15460
15461 // Only handle constant shifts < 32. Non-constant shifts are handled by
15462 // lowerShiftLeftParts/lowerShiftRightParts, and shifts >= 32 use default
15463 // legalization.
15464 auto *ShAmtC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15465 if (!ShAmtC || ShAmtC->getZExtValue() >= 32)
15466 break;
15467
15468 auto [Lo, Hi] = DAG.SplitScalar(LHS, DL, MVT::i32, MVT::i32);
15469
15470 SDValue LoRes, HiRes;
15471 if (N->getOpcode() == ISD::SHL) {
15472 // Lo = slli Lo, shamt
15473 // Hi = nsrli {Hi, Lo}, (32 - shamt)
15474 uint64_t ShAmtVal = ShAmtC->getZExtValue();
15475 LoRes = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, ShAmt);
15476 HiRes = DAG.getNode(RISCVISD::NSRL, DL, MVT::i32, Lo, Hi,
15477 DAG.getConstant(32 - ShAmtVal, DL, MVT::i32));
15478 } else {
15479 bool IsSRA = N->getOpcode() == ISD::SRA;
15480 LoRes = DAG.getNode(IsSRA ? RISCVISD::NSRA : RISCVISD::NSRL, DL,
15481 MVT::i32, Lo, Hi, ShAmt);
15482 HiRes =
15483 DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32, Hi, ShAmt);
15484 }
15485 SDValue Res = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoRes, HiRes);
15486 Results.push_back(Res);
15487 return;
15488 }
15489
15490 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
15491 "Unexpected custom legalisation");
15492 if (N->getOperand(1).getOpcode() != ISD::Constant) {
15493 // If we can use a BSET instruction, allow default promotion to apply.
15494 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
15495 isOneConstant(N->getOperand(0)))
15496 break;
15497 Results.push_back(customLegalizeToWOp(N, DAG));
15498 break;
15499 }
15500
15501 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
15502 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
15503 // shift amount.
15504 if (N->getOpcode() == ISD::SHL) {
15505 SDLoc DL(N);
15506 SDValue NewOp0 =
15507 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15508 SDValue NewOp1 =
15509 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
15510 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
15511 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
15512 DAG.getValueType(MVT::i32));
15513 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
15514 }
15515
15516 break;
15517 }
15518 case ISD::ROTL:
15519 case ISD::ROTR:
15520 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15521 "Unexpected custom legalisation");
15522 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
15523 Subtarget.hasVendorXTHeadBb()) &&
15524 "Unexpected custom legalization");
15525 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
15526 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
15527 return;
15528 Results.push_back(customLegalizeToWOp(N, DAG));
15529 break;
15530 case ISD::CTTZ:
15532 case ISD::CTLZ:
15534 case ISD::CTLS: {
15535 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15536 "Unexpected custom legalisation");
15537
15538 SDValue NewOp0 =
15539 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15540 unsigned Opc;
15541 switch (N->getOpcode()) {
15542 default: llvm_unreachable("Unexpected opcode");
15543 case ISD::CTTZ:
15545 Opc = RISCVISD::CTZW;
15546 break;
15547 case ISD::CTLZ:
15549 Opc = RISCVISD::CLZW;
15550 break;
15551 case ISD::CTLS:
15552 Opc = RISCVISD::CLSW;
15553 break;
15554 }
15555
15556 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
15557 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15558 return;
15559 }
15560 case ISD::SDIV:
15561 case ISD::UDIV:
15562 case ISD::UREM: {
15563 MVT VT = N->getSimpleValueType(0);
15564 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
15565 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
15566 "Unexpected custom legalisation");
15567 // Don't promote division/remainder by constant since we should expand those
15568 // to multiply by magic constant.
15569 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
15570 if (N->getOperand(1).getOpcode() == ISD::Constant &&
15571 !isIntDivCheap(N->getValueType(0), Attr))
15572 return;
15573
15574 // If the input is i32, use ANY_EXTEND since the W instructions don't read
15575 // the upper 32 bits. For other types we need to sign or zero extend
15576 // based on the opcode.
15577 unsigned ExtOpc = ISD::ANY_EXTEND;
15578 if (VT != MVT::i32)
15579 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
15581
15582 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
15583 break;
15584 }
15585 case ISD::SADDO:
15586 case ISD::SSUBO: {
15587 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15588 "Unexpected custom legalisation");
15589
15590 // This is similar to the default legalization, but we return the
15591 // sext_inreg instead of the add/sub.
15592 bool IsAdd = N->getOpcode() == ISD::SADDO;
15593 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
15594 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
15595 SDValue Op =
15596 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
15597 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Op,
15598 DAG.getValueType(MVT::i32));
15599
15600 SDValue Overflow;
15601
15602 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
15603 // use the default legalization.
15604 if (IsAdd && isa<ConstantSDNode>(N->getOperand(1))) {
15605 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
15606
15607 // For an addition, the result should be less than one of the operands
15608 // (LHS) if and only if the other operand (RHS) is negative, otherwise
15609 // there will be overflow.
15610 EVT OType = N->getValueType(1);
15611 SDValue ResultLowerThanLHS =
15612 DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
15613 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
15614
15615 Overflow =
15616 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
15617 } else {
15618 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, Op, ISD::SETNE);
15619 }
15620
15621 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15622 Results.push_back(Overflow);
15623 return;
15624 }
15625 case ISD::UADDO:
15626 case ISD::USUBO: {
15627 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15628 "Unexpected custom legalisation");
15629 bool IsAdd = N->getOpcode() == ISD::UADDO;
15630 // Create an ADDW or SUBW.
15631 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15632 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15633 SDValue Res =
15634 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
15635 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
15636 DAG.getValueType(MVT::i32));
15637
15638 SDValue Overflow;
15639 if (IsAdd && isOneConstant(RHS)) {
15640 // Special case uaddo X, 1 overflowed if the addition result is 0.
15641 // The general case (X + C) < C is not necessarily beneficial. Although we
15642 // reduce the live range of X, we may introduce the materialization of
15643 // constant C, especially when the setcc result is used by branch. We have
15644 // no compare with constant and branch instructions.
15645 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
15646 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
15647 } else if (IsAdd && isAllOnesConstant(RHS)) {
15648 // Special case uaddo X, -1 overflowed if X != 0.
15649 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
15650 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
15651 } else {
15652 // Sign extend the LHS and perform an unsigned compare with the ADDW
15653 // result. Since the inputs are sign extended from i32, this is equivalent
15654 // to comparing the lower 32 bits.
15655 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
15656 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
15657 IsAdd ? ISD::SETULT : ISD::SETUGT);
15658 }
15659
15660 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15661 Results.push_back(Overflow);
15662 return;
15663 }
15664 case ISD::UADDSAT:
15665 case ISD::USUBSAT:
15666 case ISD::SADDSAT:
15667 case ISD::SSUBSAT: {
15668 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15669 "Unexpected custom legalisation");
15670
15671 if (Subtarget.hasStdExtP()) {
15672 // On RV64, map scalar i32 saturating add/sub through lane 0 of a packed
15673 // v2i32 operation so we can select ps*.w instructions.
15674 SDValue LHS = DAG.getNode(
15675 ISD::SCALAR_TO_VECTOR, DL, MVT::v2i32,
15676 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)));
15677 SDValue RHS = DAG.getNode(
15678 ISD::SCALAR_TO_VECTOR, DL, MVT::v2i32,
15679 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)));
15680 SDValue VecRes = DAG.getNode(N->getOpcode(), DL, MVT::v2i32, LHS, RHS);
15681 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
15682 Results.push_back(
15683 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, VecRes, Zero));
15684 return;
15685 }
15686
15687 assert(!Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
15688 Results.push_back(expandAddSubSat(N, DAG));
15689 return;
15690 }
15691 case ISD::ABS: {
15692 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15693 "Unexpected custom legalisation");
15694
15695 if (Subtarget.hasStdExtP()) {
15696 SDValue Src =
15697 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15698 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
15699 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
15700 return;
15701 }
15702
15703 if (Subtarget.hasStdExtZbb()) {
15704 // Emit a special node that will be expanded to NEGW+MAX at isel.
15705 // This allows us to remember that the result is sign extended. Expanding
15706 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
15707 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
15708 N->getOperand(0));
15709 SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src);
15710 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
15711 return;
15712 }
15713
15714 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
15715 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15716
15717 // Freeze the source so we can increase it's use count.
15718 Src = DAG.getFreeze(Src);
15719
15720 // Copy sign bit to all bits using the sraiw pattern.
15721 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
15722 DAG.getValueType(MVT::i32));
15723 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
15724 DAG.getConstant(31, DL, MVT::i64));
15725
15726 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
15727 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
15728
15729 // NOTE: The result is only required to be anyextended, but sext is
15730 // consistent with type legalization of sub.
15731 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
15732 DAG.getValueType(MVT::i32));
15733 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
15734 return;
15735 }
15736 case ISD::BITCAST: {
15737 EVT VT = N->getValueType(0);
15738 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
15739 SDValue Op0 = N->getOperand(0);
15740 EVT Op0VT = Op0.getValueType();
15741 MVT XLenVT = Subtarget.getXLenVT();
15742 if (VT == MVT::i16 &&
15743 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
15744 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
15745 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
15746 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
15747 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
15748 Subtarget.hasStdExtFOrZfinx()) {
15749 SDValue FPConv =
15750 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
15751 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
15752 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
15753 Subtarget.hasStdExtDOrZdinx()) {
15754 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
15755 DAG.getVTList(MVT::i32, MVT::i32), Op0);
15756 SDValue Lo = NewReg.getValue(0);
15757 SDValue Hi = NewReg.getValue(1);
15758 // For big-endian, swap the order when building the i64 pair.
15759 if (!Subtarget.isLittleEndian())
15760 std::swap(Lo, Hi);
15761 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
15762 Results.push_back(RetReg);
15763 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
15764 isTypeLegal(Op0VT)) {
15765 // Custom-legalize bitcasts from fixed-length vector types to illegal
15766 // scalar types in order to improve codegen. Bitcast the vector to a
15767 // one-element vector type whose element type is the same as the result
15768 // type, and extract the first element.
15769 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
15770 if (isTypeLegal(BVT)) {
15771 SDValue BVec = DAG.getBitcast(BVT, Op0);
15772 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
15773 }
15774 }
15775 break;
15776 }
15777 case ISD::BITREVERSE: {
15778 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
15779 "Unexpected custom legalisation");
15780 MVT XLenVT = Subtarget.getXLenVT();
15781 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
15782 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
15783 // ReplaceNodeResults requires we maintain the same type for the return
15784 // value.
15785 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
15786 break;
15787 }
15788 case RISCVISD::BREV8:
15789 case RISCVISD::ORC_B: {
15790 MVT VT = N->getSimpleValueType(0);
15791 MVT XLenVT = Subtarget.getXLenVT();
15792 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
15793 "Unexpected custom legalisation");
15794 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
15795 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
15796 "Unexpected extension");
15797 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
15798 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
15799 // ReplaceNodeResults requires we maintain the same type for the return
15800 // value.
15801 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
15802 break;
15803 }
15804 case RISCVISD::ASUB:
15805 case RISCVISD::ASUBU:
15806 case RISCVISD::MULHSU:
15807 case RISCVISD::MULHR:
15808 case RISCVISD::MULHRU:
15809 case RISCVISD::MULHRSU: {
15810 MVT VT = N->getSimpleValueType(0);
15811 SDValue Op0 = N->getOperand(0);
15812 SDValue Op1 = N->getOperand(1);
15813 unsigned Opcode = N->getOpcode();
15814 // PMULH* variants don't support i8
15815 [[maybe_unused]] bool IsMulH =
15816 Opcode == RISCVISD::MULHSU || Opcode == RISCVISD::MULHR ||
15817 Opcode == RISCVISD::MULHRU || Opcode == RISCVISD::MULHRSU;
15818 assert(VT == MVT::v2i16 || (!IsMulH && VT == MVT::v4i8));
15819 MVT NewVT = MVT::v4i16;
15820 if (VT == MVT::v4i8)
15821 NewVT = MVT::v8i8;
15822 SDValue Undef = DAG.getUNDEF(VT);
15823 Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op0, Undef});
15824 Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op1, Undef});
15825 Results.push_back(DAG.getNode(Opcode, DL, NewVT, {Op0, Op1}));
15826 return;
15827 }
15829 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
15830 // type is illegal (currently only vXi64 RV32).
15831 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
15832 // transferred to the destination register. We issue two of these from the
15833 // upper- and lower- halves of the SEW-bit vector element, slid down to the
15834 // first element.
15835 SDValue Vec = N->getOperand(0);
15836 SDValue Idx = N->getOperand(1);
15837
15838 // The vector type hasn't been legalized yet so we can't issue target
15839 // specific nodes if it needs legalization.
15840 // FIXME: We would manually legalize if it's important.
15841 if (!isTypeLegal(Vec.getValueType()))
15842 return;
15843
15844 MVT VecVT = Vec.getSimpleValueType();
15845
15846 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
15847 VecVT.getVectorElementType() == MVT::i64 &&
15848 "Unexpected EXTRACT_VECTOR_ELT legalization");
15849
15850 // If this is a fixed vector, we need to convert it to a scalable vector.
15851 MVT ContainerVT = VecVT;
15852 if (VecVT.isFixedLengthVector()) {
15853 ContainerVT = getContainerForFixedLengthVector(VecVT);
15854 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
15855 }
15856
15857 MVT XLenVT = Subtarget.getXLenVT();
15858
15859 // Use a VL of 1 to avoid processing more elements than we need.
15860 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
15861
15862 // Unless the index is known to be 0, we must slide the vector down to get
15863 // the desired element into index 0.
15864 if (!isNullConstant(Idx)) {
15865 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
15866 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
15867 }
15868
15869 // Extract the lower XLEN bits of the correct vector element.
15870 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15871
15872 // To extract the upper XLEN bits of the vector element, shift the first
15873 // element right by 32 bits and re-extract the lower XLEN bits.
15874 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
15875 DAG.getUNDEF(ContainerVT),
15876 DAG.getConstant(32, DL, XLenVT), VL);
15877 SDValue LShr32 =
15878 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
15879 DAG.getUNDEF(ContainerVT), Mask, VL);
15880
15881 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15882
15883 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15884 break;
15885 }
15887 unsigned IntNo = N->getConstantOperandVal(0);
15888 switch (IntNo) {
15889 default:
15891 "Don't know how to custom type legalize this intrinsic!");
15892 case Intrinsic::experimental_get_vector_length: {
15893 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
15894 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15895 return;
15896 }
15897 case Intrinsic::experimental_cttz_elts: {
15898 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
15899 Results.push_back(DAG.getZExtOrTrunc(Res, DL, N->getValueType(0)));
15900 return;
15901 }
15902 case Intrinsic::riscv_orc_b:
15903 case Intrinsic::riscv_brev8:
15904 case Intrinsic::riscv_sha256sig0:
15905 case Intrinsic::riscv_sha256sig1:
15906 case Intrinsic::riscv_sha256sum0:
15907 case Intrinsic::riscv_sha256sum1:
15908 case Intrinsic::riscv_sm3p0:
15909 case Intrinsic::riscv_sm3p1: {
15910 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15911 return;
15912 unsigned Opc;
15913 switch (IntNo) {
15914 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
15915 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
15916 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
15917 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
15918 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
15919 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
15920 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
15921 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
15922 }
15923
15924 SDValue NewOp =
15925 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15926 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
15927 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15928 return;
15929 }
15930 case Intrinsic::riscv_sm4ks:
15931 case Intrinsic::riscv_sm4ed: {
15932 unsigned Opc =
15933 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
15934 SDValue NewOp0 =
15935 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15936 SDValue NewOp1 =
15937 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15938 SDValue Res =
15939 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
15940 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15941 return;
15942 }
15943 case Intrinsic::riscv_mopr: {
15944 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15945 return;
15946 SDValue NewOp =
15947 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15948 SDValue Res = DAG.getNode(
15949 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15950 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15951 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15952 return;
15953 }
15954 case Intrinsic::riscv_moprr: {
15955 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15956 return;
15957 SDValue NewOp0 =
15958 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15959 SDValue NewOp1 =
15960 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15961 SDValue Res = DAG.getNode(
15962 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15963 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15964 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15965 return;
15966 }
15967 case Intrinsic::riscv_clmulh:
15968 case Intrinsic::riscv_clmulr: {
15969 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15970 return;
15971
15972 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15973 // to the full 128-bit clmul result of multiplying two xlen values.
15974 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15975 // upper 32 bits.
15976 //
15977 // The alternative is to mask the inputs to 32 bits and use clmul, but
15978 // that requires two shifts to mask each input without zext.w.
15979 // FIXME: If the inputs are known zero extended or could be freely
15980 // zero extended, the mask form would be better.
15981 SDValue NewOp0 =
15982 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15983 SDValue NewOp1 =
15984 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15985 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15986 DAG.getConstant(32, DL, MVT::i64));
15987 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15988 DAG.getConstant(32, DL, MVT::i64));
15989 unsigned Opc =
15990 IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;
15991 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15992 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15993 DAG.getConstant(32, DL, MVT::i64));
15994 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15995 return;
15996 }
15997 case Intrinsic::riscv_vmv_x_s: {
15998 EVT VT = N->getValueType(0);
15999 MVT XLenVT = Subtarget.getXLenVT();
16000 if (VT.bitsLT(XLenVT)) {
16001 // Simple case just extract using vmv.x.s and truncate.
16002 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
16003 Subtarget.getXLenVT(), N->getOperand(1));
16004 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
16005 return;
16006 }
16007
16008 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
16009 "Unexpected custom legalization");
16010
16011 // We need to do the move in two steps.
16012 SDValue Vec = N->getOperand(1);
16013 MVT VecVT = Vec.getSimpleValueType();
16014
16015 // First extract the lower XLEN bits of the element.
16016 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
16017
16018 // To extract the upper XLEN bits of the vector element, shift the first
16019 // element right by 32 bits and re-extract the lower XLEN bits.
16020 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
16021
16022 SDValue ThirtyTwoV =
16023 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
16024 DAG.getConstant(32, DL, XLenVT), VL);
16025 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
16026 DAG.getUNDEF(VecVT), Mask, VL);
16027 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
16028
16029 Results.push_back(
16030 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
16031 break;
16032 }
16033 }
16034 break;
16035 }
16036 case ISD::VECREDUCE_ADD:
16037 case ISD::VECREDUCE_AND:
16038 case ISD::VECREDUCE_OR:
16039 case ISD::VECREDUCE_XOR:
16044 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
16045 Results.push_back(V);
16046 break;
16047 case ISD::VP_REDUCE_ADD:
16048 case ISD::VP_REDUCE_AND:
16049 case ISD::VP_REDUCE_OR:
16050 case ISD::VP_REDUCE_XOR:
16051 case ISD::VP_REDUCE_SMAX:
16052 case ISD::VP_REDUCE_UMAX:
16053 case ISD::VP_REDUCE_SMIN:
16054 case ISD::VP_REDUCE_UMIN:
16055 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
16056 Results.push_back(V);
16057 break;
16058 case ISD::GET_ROUNDING: {
16059 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
16060 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
16061 Results.push_back(Res.getValue(0));
16062 Results.push_back(Res.getValue(1));
16063 break;
16064 }
16065 }
16066}
16067
16068/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
16069/// which corresponds to it.
16070static unsigned getVecReduceOpcode(unsigned Opc) {
16071 switch (Opc) {
16072 default:
16073 llvm_unreachable("Unhandled binary to transform reduction");
16074 case ISD::ADD:
16075 return ISD::VECREDUCE_ADD;
16076 case ISD::UMAX:
16077 return ISD::VECREDUCE_UMAX;
16078 case ISD::SMAX:
16079 return ISD::VECREDUCE_SMAX;
16080 case ISD::UMIN:
16081 return ISD::VECREDUCE_UMIN;
16082 case ISD::SMIN:
16083 return ISD::VECREDUCE_SMIN;
16084 case ISD::AND:
16085 return ISD::VECREDUCE_AND;
16086 case ISD::OR:
16087 return ISD::VECREDUCE_OR;
16088 case ISD::XOR:
16089 return ISD::VECREDUCE_XOR;
16090 case ISD::FADD:
16091 // Note: This is the associative form of the generic reduction opcode.
16092 return ISD::VECREDUCE_FADD;
16093 case ISD::FMAXNUM:
16094 return ISD::VECREDUCE_FMAX;
16095 case ISD::FMINNUM:
16096 return ISD::VECREDUCE_FMIN;
16097 }
16098}
16099
16100/// Perform two related transforms whose purpose is to incrementally recognize
16101/// an explode_vector followed by scalar reduction as a vector reduction node.
16102/// This exists to recover from a deficiency in SLP which can't handle
16103/// forests with multiple roots sharing common nodes. In some cases, one
16104/// of the trees will be vectorized, and the other will remain (unprofitably)
16105/// scalarized.
16106static SDValue
16108 const RISCVSubtarget &Subtarget) {
16109
16110 // This transforms need to run before all integer types have been legalized
16111 // to i64 (so that the vector element type matches the add type), and while
16112 // it's safe to introduce odd sized vector types.
16114 return SDValue();
16115
16116 // Without V, this transform isn't useful. We could form the (illegal)
16117 // operations and let them be scalarized again, but there's really no point.
16118 if (!Subtarget.hasVInstructions())
16119 return SDValue();
16120
16121 const SDLoc DL(N);
16122 const EVT VT = N->getValueType(0);
16123 const unsigned Opc = N->getOpcode();
16124
16125 if (!VT.isInteger()) {
16126 switch (Opc) {
16127 default:
16128 return SDValue();
16129 case ISD::FADD:
16130 // For FADD, we only handle the case with reassociation allowed. We
16131 // could handle strict reduction order, but at the moment, there's no
16132 // known reason to, and the complexity isn't worth it.
16133 if (!N->getFlags().hasAllowReassociation())
16134 return SDValue();
16135 break;
16136 case ISD::FMAXNUM:
16137 case ISD::FMINNUM:
16138 break;
16139 }
16140 }
16141
16142 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
16143 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
16144 "Inconsistent mappings");
16145 SDValue LHS = N->getOperand(0);
16146 SDValue RHS = N->getOperand(1);
16147
16148 if (!LHS.hasOneUse() || !RHS.hasOneUse())
16149 return SDValue();
16150
16151 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16152 std::swap(LHS, RHS);
16153
16154 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16155 !isa<ConstantSDNode>(RHS.getOperand(1)))
16156 return SDValue();
16157
16158 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
16159 SDValue SrcVec = RHS.getOperand(0);
16160 EVT SrcVecVT = SrcVec.getValueType();
16161 assert(SrcVecVT.getVectorElementType() == VT);
16162 if (SrcVecVT.isScalableVector())
16163 return SDValue();
16164
16165 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
16166 return SDValue();
16167
16168 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
16169 // reduce_op (extract_subvector [2 x VT] from V). This will form the
16170 // root of our reduction tree. TODO: We could extend this to any two
16171 // adjacent aligned constant indices if desired.
16172 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16173 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
16174 uint64_t LHSIdx =
16175 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
16176 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
16177 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
16178 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
16179 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
16180 }
16181 }
16182
16183 // Match (binop (reduce (extract_subvector V, 0),
16184 // (extract_vector_elt V, sizeof(SubVec))))
16185 // into a reduction of one more element from the original vector V.
16186 if (LHS.getOpcode() != ReduceOpc)
16187 return SDValue();
16188
16189 SDValue ReduceVec = LHS.getOperand(0);
16190 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16191 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
16192 isNullConstant(ReduceVec.getOperand(1)) &&
16193 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
16194 // For illegal types (e.g. 3xi32), most will be combined again into a
16195 // wider (hopefully legal) type. If this is a terminal state, we are
16196 // relying on type legalization here to produce something reasonable
16197 // and this lowering quality could probably be improved. (TODO)
16198 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
16199 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
16200 return DAG.getNode(ReduceOpc, DL, VT, Vec,
16201 ReduceVec->getFlags() & N->getFlags());
16202 }
16203
16204 return SDValue();
16205}
16206
16207
16208// Try to fold (<bop> x, (reduction.<bop> vec, start))
16210 const RISCVSubtarget &Subtarget) {
16211 auto BinOpToRVVReduce = [](unsigned Opc) {
16212 switch (Opc) {
16213 default:
16214 llvm_unreachable("Unhandled binary to transform reduction");
16215 case ISD::ADD:
16216 return RISCVISD::VECREDUCE_ADD_VL;
16217 case ISD::UMAX:
16218 return RISCVISD::VECREDUCE_UMAX_VL;
16219 case ISD::SMAX:
16220 return RISCVISD::VECREDUCE_SMAX_VL;
16221 case ISD::UMIN:
16222 return RISCVISD::VECREDUCE_UMIN_VL;
16223 case ISD::SMIN:
16224 return RISCVISD::VECREDUCE_SMIN_VL;
16225 case ISD::AND:
16226 return RISCVISD::VECREDUCE_AND_VL;
16227 case ISD::OR:
16228 return RISCVISD::VECREDUCE_OR_VL;
16229 case ISD::XOR:
16230 return RISCVISD::VECREDUCE_XOR_VL;
16231 case ISD::FADD:
16232 return RISCVISD::VECREDUCE_FADD_VL;
16233 case ISD::FMAXNUM:
16234 return RISCVISD::VECREDUCE_FMAX_VL;
16235 case ISD::FMINNUM:
16236 return RISCVISD::VECREDUCE_FMIN_VL;
16237 }
16238 };
16239
16240 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
16241 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16242 isNullConstant(V.getOperand(1)) &&
16243 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
16244 };
16245
16246 unsigned Opc = N->getOpcode();
16247 unsigned ReduceIdx;
16248 if (IsReduction(N->getOperand(0), Opc))
16249 ReduceIdx = 0;
16250 else if (IsReduction(N->getOperand(1), Opc))
16251 ReduceIdx = 1;
16252 else
16253 return SDValue();
16254
16255 // Skip if FADD disallows reassociation but the combiner needs.
16256 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
16257 return SDValue();
16258
16259 SDValue Extract = N->getOperand(ReduceIdx);
16260 SDValue Reduce = Extract.getOperand(0);
16261 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
16262 return SDValue();
16263
16264 SDValue ScalarV = Reduce.getOperand(2);
16265 EVT ScalarVT = ScalarV.getValueType();
16266 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
16267 ScalarV.getOperand(0)->isUndef() &&
16268 isNullConstant(ScalarV.getOperand(2)))
16269 ScalarV = ScalarV.getOperand(1);
16270
16271 // Make sure that ScalarV is a splat with VL=1.
16272 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
16273 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
16274 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
16275 return SDValue();
16276
16277 if (!isNonZeroAVL(ScalarV.getOperand(2)))
16278 return SDValue();
16279
16280 // Check the scalar of ScalarV is neutral element
16281 // TODO: Deal with value other than neutral element.
16282 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
16283 0))
16284 return SDValue();
16285
16286 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
16287 // FIXME: We might be able to improve this if operand 0 is undef.
16288 if (!isNonZeroAVL(Reduce.getOperand(5)))
16289 return SDValue();
16290
16291 SDValue NewStart = N->getOperand(1 - ReduceIdx);
16292
16293 SDLoc DL(N);
16294 SDValue NewScalarV =
16295 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
16296 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
16297
16298 // If we looked through an INSERT_SUBVECTOR we need to restore it.
16299 if (ScalarVT != ScalarV.getValueType())
16300 NewScalarV =
16301 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
16302
16303 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
16304 NewScalarV, Reduce.getOperand(3),
16305 Reduce.getOperand(4), Reduce.getOperand(5)};
16306 SDValue NewReduce =
16307 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
16308 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
16309 Extract.getOperand(1));
16310}
16311
16312// Optimize (add (shl x, c0), (shl y, c1)) ->
16313// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
16314// or
16315// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
16317 const RISCVSubtarget &Subtarget) {
16318 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
16319 // extension.
16320 if (!Subtarget.hasShlAdd(3))
16321 return SDValue();
16322
16323 // Skip for vector types and larger types.
16324 EVT VT = N->getValueType(0);
16325 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
16326 return SDValue();
16327
16328 // The two operand nodes must be SHL and have no other use.
16329 SDValue N0 = N->getOperand(0);
16330 SDValue N1 = N->getOperand(1);
16331 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
16332 !N0->hasOneUse() || !N1->hasOneUse())
16333 return SDValue();
16334
16335 // Check c0 and c1.
16336 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16337 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
16338 if (!N0C || !N1C)
16339 return SDValue();
16340 int64_t C0 = N0C->getSExtValue();
16341 int64_t C1 = N1C->getSExtValue();
16342 if (C0 <= 0 || C1 <= 0)
16343 return SDValue();
16344
16345 int64_t Diff = std::abs(C0 - C1);
16346 if (!Subtarget.hasShlAdd(Diff))
16347 return SDValue();
16348
16349 // Build nodes.
16350 SDLoc DL(N);
16351 int64_t Bits = std::min(C0, C1);
16352 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
16353 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
16354 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
16355 DAG.getTargetConstant(Diff, DL, VT), NS);
16356 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
16357}
16358
16359// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
16360// or 3.
16362 SelectionDAG &DAG) {
16363 using namespace llvm::SDPatternMatch;
16364
16365 // Looking for a reg-reg add and not an addi.
16366 if (isa<ConstantSDNode>(N->getOperand(1)))
16367 return SDValue();
16368
16369 // Based on testing it seems that performance degrades if the ADDI has
16370 // more than 2 uses.
16371 if (AddI->use_size() > 2)
16372 return SDValue();
16373
16374 APInt AddVal;
16375 SDValue SHLVal;
16376 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
16377 return SDValue();
16378
16379 APInt VShift;
16380 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
16381 return SDValue();
16382
16383 if (VShift.slt(1) || VShift.sgt(3))
16384 return SDValue();
16385
16386 SDLoc DL(N);
16387 EVT VT = N->getValueType(0);
16388 // The shift must be positive but the add can be signed.
16389 uint64_t ShlConst = VShift.getZExtValue();
16390 int64_t AddConst = AddVal.getSExtValue();
16391
16392 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
16393 DAG.getTargetConstant(ShlConst, DL, VT), Other);
16394 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
16395 DAG.getSignedConstant(AddConst, DL, VT));
16396}
16397
16398// Optimize (add (add (shl x, c0), c1), y) ->
16399// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
16401 const RISCVSubtarget &Subtarget) {
16402 // Perform this optimization only in the zba extension.
16403 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
16404 return SDValue();
16405
16406 // Skip for vector types and larger types.
16407 EVT VT = N->getValueType(0);
16408 if (VT != Subtarget.getXLenVT())
16409 return SDValue();
16410
16411 SDValue AddI = N->getOperand(0);
16412 SDValue Other = N->getOperand(1);
16413 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
16414 return V;
16415 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
16416 return V;
16417 return SDValue();
16418}
16419
16420// Combine a constant select operand into its use:
16421//
16422// (and (select cond, -1, c), x)
16423// -> (select cond, x, (and x, c)) [AllOnes=1]
16424// (or (select cond, 0, c), x)
16425// -> (select cond, x, (or x, c)) [AllOnes=0]
16426// (xor (select cond, 0, c), x)
16427// -> (select cond, x, (xor x, c)) [AllOnes=0]
16428// (add (select cond, 0, c), x)
16429// -> (select cond, x, (add x, c)) [AllOnes=0]
16430// (sub x, (select cond, 0, c))
16431// -> (select cond, x, (sub x, c)) [AllOnes=0]
16433 SelectionDAG &DAG, bool AllOnes,
16434 const RISCVSubtarget &Subtarget) {
16435 EVT VT = N->getValueType(0);
16436
16437 // Skip vectors.
16438 if (VT.isVector())
16439 return SDValue();
16440
16441 if (!Subtarget.hasConditionalMoveFusion()) {
16442 // (select cond, x, (and x, c)) has custom lowering with Zicond.
16443 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
16444 return SDValue();
16445
16446 // Maybe harmful when condition code has multiple use.
16447 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
16448 return SDValue();
16449
16450 // Maybe harmful when VT is wider than XLen.
16451 if (VT.getSizeInBits() > Subtarget.getXLen())
16452 return SDValue();
16453 }
16454
16455 if ((Slct.getOpcode() != ISD::SELECT &&
16456 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
16457 !Slct.hasOneUse())
16458 return SDValue();
16459
16460 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
16462 };
16463
16464 bool SwapSelectOps;
16465 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
16466 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
16467 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
16468 SDValue NonConstantVal;
16469 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
16470 SwapSelectOps = false;
16471 NonConstantVal = FalseVal;
16472 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
16473 SwapSelectOps = true;
16474 NonConstantVal = TrueVal;
16475 } else
16476 return SDValue();
16477
16478 // Slct is now know to be the desired identity constant when CC is true.
16479 TrueVal = OtherOp;
16480 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
16481 // Unless SwapSelectOps says the condition should be false.
16482 if (SwapSelectOps)
16483 std::swap(TrueVal, FalseVal);
16484
16485 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
16486 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
16487 {Slct.getOperand(0), Slct.getOperand(1),
16488 Slct.getOperand(2), TrueVal, FalseVal});
16489
16490 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
16491 {Slct.getOperand(0), TrueVal, FalseVal});
16492}
16493
16494// Attempt combineSelectAndUse on each operand of a commutative operator N.
16496 bool AllOnes,
16497 const RISCVSubtarget &Subtarget) {
16498 SDValue N0 = N->getOperand(0);
16499 SDValue N1 = N->getOperand(1);
16500 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
16501 return Result;
16502 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
16503 return Result;
16504 return SDValue();
16505}
16506
16507// Transform (add (mul x, c0), c1) ->
16508// (add (mul (add x, c1/c0), c0), c1%c0).
16509// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
16510// that should be excluded is when c0*(c1/c0) is simm12, which will lead
16511// to an infinite loop in DAGCombine if transformed.
16512// Or transform (add (mul x, c0), c1) ->
16513// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
16514// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
16515// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
16516// lead to an infinite loop in DAGCombine if transformed.
16517// Or transform (add (mul x, c0), c1) ->
16518// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
16519// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
16520// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
16521// lead to an infinite loop in DAGCombine if transformed.
16522// Or transform (add (mul x, c0), c1) ->
16523// (mul (add x, c1/c0), c0).
16524// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
16526 const RISCVSubtarget &Subtarget) {
16527 // Skip for vector types and larger types.
16528 EVT VT = N->getValueType(0);
16529 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
16530 return SDValue();
16531 // The first operand node must be a MUL and has no other use.
16532 SDValue N0 = N->getOperand(0);
16533 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
16534 return SDValue();
16535 // Check if c0 and c1 match above conditions.
16536 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16537 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
16538 if (!N0C || !N1C)
16539 return SDValue();
16540 // If N0C has multiple uses it's possible one of the cases in
16541 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
16542 // in an infinite loop.
16543 if (!N0C->hasOneUse())
16544 return SDValue();
16545 int64_t C0 = N0C->getSExtValue();
16546 int64_t C1 = N1C->getSExtValue();
16547 int64_t CA, CB;
16548 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
16549 return SDValue();
16550 // Search for proper CA (non-zero) and CB that both are simm12.
16551 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
16552 !isInt<12>(C0 * (C1 / C0))) {
16553 CA = C1 / C0;
16554 CB = C1 % C0;
16555 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
16556 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
16557 CA = C1 / C0 + 1;
16558 CB = C1 % C0 - C0;
16559 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
16560 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
16561 CA = C1 / C0 - 1;
16562 CB = C1 % C0 + C0;
16563 } else
16564 return SDValue();
16565 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
16566 SDLoc DL(N);
16567 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
16568 DAG.getSignedConstant(CA, DL, VT));
16569 SDValue New1 =
16570 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
16571 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
16572}
16573
16574// add (zext, zext) -> zext (add (zext, zext))
16575// sub (zext, zext) -> sext (sub (zext, zext))
16576// mul (zext, zext) -> zext (mul (zext, zext))
16577// sdiv (zext, zext) -> zext (sdiv (zext, zext))
16578// udiv (zext, zext) -> zext (udiv (zext, zext))
16579// srem (zext, zext) -> zext (srem (zext, zext))
16580// urem (zext, zext) -> zext (urem (zext, zext))
16581//
16582// where the sum of the extend widths match, and the the range of the bin op
16583// fits inside the width of the narrower bin op. (For profitability on rvv, we
16584// use a power of two for both inner and outer extend.)
16586
16587 EVT VT = N->getValueType(0);
16588 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
16589 return SDValue();
16590
16591 SDValue N0 = N->getOperand(0);
16592 SDValue N1 = N->getOperand(1);
16594 return SDValue();
16595 if (!N0.hasOneUse() || !N1.hasOneUse())
16596 return SDValue();
16597
16598 SDValue Src0 = N0.getOperand(0);
16599 SDValue Src1 = N1.getOperand(0);
16600 EVT SrcVT = Src0.getValueType();
16601 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
16602 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
16603 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
16604 return SDValue();
16605
16606 LLVMContext &C = *DAG.getContext();
16608 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
16609
16610 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
16611 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
16612
16613 // Src0 and Src1 are zero extended, so they're always positive if signed.
16614 //
16615 // sub can produce a negative from two positive operands, so it needs sign
16616 // extended. Other nodes produce a positive from two positive operands, so
16617 // zero extend instead.
16618 unsigned OuterExtend =
16619 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
16620
16621 return DAG.getNode(
16622 OuterExtend, SDLoc(N), VT,
16623 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
16624}
16625
16626// Try to turn (add (xor bool, 1) -1) into (neg bool).
16628 SDValue N0 = N->getOperand(0);
16629 SDValue N1 = N->getOperand(1);
16630 EVT VT = N->getValueType(0);
16631 SDLoc DL(N);
16632
16633 // RHS should be -1.
16634 if (!isAllOnesConstant(N1))
16635 return SDValue();
16636
16637 // Look for (xor X, 1).
16638 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
16639 return SDValue();
16640
16641 // First xor input should be 0 or 1.
16643 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
16644 return SDValue();
16645
16646 // Emit a negate of the setcc.
16647 return DAG.getNegative(N0.getOperand(0), DL, VT);
16648}
16649
16652 const RISCVSubtarget &Subtarget) {
16653 SelectionDAG &DAG = DCI.DAG;
16654 if (SDValue V = combineAddOfBooleanXor(N, DAG))
16655 return V;
16656 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
16657 return V;
16658 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
16659 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
16660 return V;
16661 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
16662 return V;
16663 }
16664 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16665 return V;
16666 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16667 return V;
16668 if (SDValue V = combineBinOpOfZExt(N, DAG))
16669 return V;
16670
16671 // fold (add (select lhs, rhs, cc, 0, y), x) ->
16672 // (select lhs, rhs, cc, x, (add x, y))
16673 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16674}
16675
16676// Try to turn a sub boolean RHS and constant LHS into an addi.
16678 SDValue N0 = N->getOperand(0);
16679 SDValue N1 = N->getOperand(1);
16680 EVT VT = N->getValueType(0);
16681 SDLoc DL(N);
16682
16683 // Require a constant LHS.
16684 auto *N0C = dyn_cast<ConstantSDNode>(N0);
16685 if (!N0C)
16686 return SDValue();
16687
16688 // All our optimizations involve subtracting 1 from the immediate and forming
16689 // an ADDI. Make sure the new immediate is valid for an ADDI.
16690 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
16691 if (!ImmValMinus1.isSignedIntN(12))
16692 return SDValue();
16693
16694 SDValue NewLHS;
16695 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
16696 // (sub constant, (setcc x, y, eq/neq)) ->
16697 // (add (setcc x, y, neq/eq), constant - 1)
16698 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
16699 EVT SetCCOpVT = N1.getOperand(0).getValueType();
16700 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
16701 return SDValue();
16702 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16703 NewLHS =
16704 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
16705 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
16706 N1.getOperand(0).getOpcode() == ISD::SETCC) {
16707 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
16708 // Since setcc returns a bool the xor is equivalent to 1-setcc.
16709 NewLHS = N1.getOperand(0);
16710 } else
16711 return SDValue();
16712
16713 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
16714 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
16715}
16716
16717// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
16718// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
16719// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
16720// valid with Y=3, while 0b0000_1000_0000_0100 is not.
16722 const RISCVSubtarget &Subtarget) {
16723 if (!Subtarget.hasStdExtZbb())
16724 return SDValue();
16725
16726 EVT VT = N->getValueType(0);
16727
16728 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
16729 return SDValue();
16730
16731 SDValue N0 = N->getOperand(0);
16732 SDValue N1 = N->getOperand(1);
16733
16734 if (N0->getOpcode() != ISD::SHL)
16735 return SDValue();
16736
16737 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
16738 if (!ShAmtCLeft)
16739 return SDValue();
16740 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
16741
16742 if (ShiftedAmount >= 8)
16743 return SDValue();
16744
16745 SDValue LeftShiftOperand = N0->getOperand(0);
16746 SDValue RightShiftOperand = N1;
16747
16748 if (ShiftedAmount != 0) { // Right operand must be a right shift.
16749 if (N1->getOpcode() != ISD::SRL)
16750 return SDValue();
16751 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
16752 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
16753 return SDValue();
16754 RightShiftOperand = N1.getOperand(0);
16755 }
16756
16757 // At least one shift should have a single use.
16758 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
16759 return SDValue();
16760
16761 if (LeftShiftOperand != RightShiftOperand)
16762 return SDValue();
16763
16764 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
16765 Mask <<= ShiftedAmount;
16766 // Check that X has indeed the right shape (only the Y-th bit can be set in
16767 // every byte).
16768 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
16769 return SDValue();
16770
16771 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
16772}
16773
16775 const RISCVSubtarget &Subtarget) {
16776 if (SDValue V = combineSubOfBoolean(N, DAG))
16777 return V;
16778
16779 EVT VT = N->getValueType(0);
16780 SDValue N0 = N->getOperand(0);
16781 SDValue N1 = N->getOperand(1);
16782 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
16783 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
16784 isNullConstant(N1.getOperand(1)) &&
16785 N1.getValueType() == N1.getOperand(0).getValueType()) {
16786 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
16787 if (CCVal == ISD::SETLT) {
16788 SDLoc DL(N);
16789 unsigned ShAmt = N0.getValueSizeInBits() - 1;
16790 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
16791 DAG.getConstant(ShAmt, DL, VT));
16792 }
16793 }
16794
16795 if (SDValue V = combineBinOpOfZExt(N, DAG))
16796 return V;
16797 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
16798 return V;
16799
16800 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
16801 // (select lhs, rhs, cc, x, (sub x, y))
16802 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
16803}
16804
16805// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
16806// Legalizing setcc can introduce xors like this. Doing this transform reduces
16807// the number of xors and may allow the xor to fold into a branch condition.
16809 SDValue N0 = N->getOperand(0);
16810 SDValue N1 = N->getOperand(1);
16811 bool IsAnd = N->getOpcode() == ISD::AND;
16812
16813 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
16814 return SDValue();
16815
16816 if (!N0.hasOneUse() || !N1.hasOneUse())
16817 return SDValue();
16818
16819 SDValue N01 = N0.getOperand(1);
16820 SDValue N11 = N1.getOperand(1);
16821
16822 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
16823 // (xor X, -1) based on the upper bits of the other operand being 0. If the
16824 // operation is And, allow one of the Xors to use -1.
16825 if (isOneConstant(N01)) {
16826 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
16827 return SDValue();
16828 } else if (isOneConstant(N11)) {
16829 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
16830 if (!(IsAnd && isAllOnesConstant(N01)))
16831 return SDValue();
16832 } else
16833 return SDValue();
16834
16835 EVT VT = N->getValueType(0);
16836
16837 SDValue N00 = N0.getOperand(0);
16838 SDValue N10 = N1.getOperand(0);
16839
16840 // The LHS of the xors needs to be 0/1.
16842 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
16843 return SDValue();
16844
16845 // Invert the opcode and insert a new xor.
16846 SDLoc DL(N);
16847 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16848 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
16849 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
16850}
16851
16852// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
16853// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
16854// value to an unsigned value. This will be lowered to vmax and series of
16855// vnclipu instructions later. This can be extended to other truncated types
16856// other than i8 by replacing 256 and 255 with the equivalent constants for the
16857// type.
16859 EVT VT = N->getValueType(0);
16860 SDValue N0 = N->getOperand(0);
16861 EVT SrcVT = N0.getValueType();
16862
16863 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16864 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
16865 return SDValue();
16866
16867 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
16868 return SDValue();
16869
16870 SDValue Cond = N0.getOperand(0);
16871 SDValue True = N0.getOperand(1);
16872 SDValue False = N0.getOperand(2);
16873
16874 if (Cond.getOpcode() != ISD::SETCC)
16875 return SDValue();
16876
16877 // FIXME: Support the version of this pattern with the select operands
16878 // swapped.
16879 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16880 if (CCVal != ISD::SETULT)
16881 return SDValue();
16882
16883 SDValue CondLHS = Cond.getOperand(0);
16884 SDValue CondRHS = Cond.getOperand(1);
16885
16886 if (CondLHS != True)
16887 return SDValue();
16888
16889 unsigned ScalarBits = VT.getScalarSizeInBits();
16890
16891 // FIXME: Support other constants.
16892 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
16893 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
16894 return SDValue();
16895
16896 if (False.getOpcode() != ISD::SIGN_EXTEND)
16897 return SDValue();
16898
16899 False = False.getOperand(0);
16900
16901 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
16902 return SDValue();
16903
16904 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
16905 if (!FalseRHSC || !FalseRHSC->isZero())
16906 return SDValue();
16907
16908 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
16909 if (CCVal2 != ISD::SETGT)
16910 return SDValue();
16911
16912 // Emit the signed to unsigned saturation pattern.
16913 SDLoc DL(N);
16914 SDValue Max =
16915 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
16916 SDValue Min =
16917 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
16918 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
16919 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
16920}
16921
16922// Handle P extension truncate patterns:
16923// ASUB/ASUBU: (trunc (srl (sub ([s|z]ext a), ([s|z]ext b)), 1))
16924// MULHSU: (trunc (srl (mul (sext a), (zext b)), EltBits))
16925// MULHR*: (trunc (srl (add (mul (sext a), (zext b)), round_const), EltBits))
16927 const RISCVSubtarget &Subtarget) {
16928 SDValue N0 = N->getOperand(0);
16929 EVT VT = N->getValueType(0);
16930 if (N0.getOpcode() != ISD::SRL)
16931 return SDValue();
16932
16933 MVT VecVT = VT.getSimpleVT();
16934 if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
16935 VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
16936 return SDValue();
16937
16938 // Check if shift amount is a splat constant
16939 SDValue ShAmt = N0.getOperand(1);
16940 if (ShAmt.getOpcode() != ISD::BUILD_VECTOR)
16941 return SDValue();
16942
16944 if (!BV)
16945 return SDValue();
16946 SDValue Splat = BV->getSplatValue();
16947 if (!Splat)
16948 return SDValue();
16950 if (!C)
16951 return SDValue();
16952
16953 SDValue Op = N0.getOperand(0);
16954 unsigned ShAmtVal = C->getZExtValue();
16955 unsigned EltBits = VecVT.getScalarSizeInBits();
16956
16957 // Check for rounding pattern: (add (mul ...), round_const)
16958 bool IsRounding = false;
16959 if (Op.getOpcode() == ISD::ADD && (EltBits == 16 || EltBits == 32)) {
16960 SDValue AddRHS = Op.getOperand(1);
16961 if (auto *RndBV = dyn_cast<BuildVectorSDNode>(AddRHS.getNode())) {
16962 if (auto *RndC =
16963 dyn_cast_or_null<ConstantSDNode>(RndBV->getSplatValue())) {
16964 uint64_t ExpectedRnd = 1ULL << (EltBits - 1);
16965 if (RndC->getZExtValue() == ExpectedRnd &&
16966 Op.getOperand(0).getOpcode() == ISD::MUL) {
16967 Op = Op.getOperand(0);
16968 IsRounding = true;
16969 }
16970 }
16971 }
16972 }
16973
16974 SDValue LHS = Op.getOperand(0);
16975 SDValue RHS = Op.getOperand(1);
16976
16977 bool LHSIsSExt = LHS.getOpcode() == ISD::SIGN_EXTEND;
16978 bool LHSIsZExt = LHS.getOpcode() == ISD::ZERO_EXTEND;
16979 bool RHSIsSExt = RHS.getOpcode() == ISD::SIGN_EXTEND;
16980 bool RHSIsZExt = RHS.getOpcode() == ISD::ZERO_EXTEND;
16981
16982 if (!(LHSIsSExt || LHSIsZExt) || !(RHSIsSExt || RHSIsZExt))
16983 return SDValue();
16984
16985 SDValue A = LHS.getOperand(0);
16986 SDValue B = RHS.getOperand(0);
16987
16988 if (A.getValueType() != VT || B.getValueType() != VT)
16989 return SDValue();
16990
16991 unsigned Opc;
16992 switch (Op.getOpcode()) {
16993 default:
16994 return SDValue();
16995 case ISD::SUB:
16996 // PASUB/PASUBU: shift amount must be 1
16997 if (ShAmtVal != 1)
16998 return SDValue();
16999 if (LHSIsSExt && RHSIsSExt)
17000 Opc = RISCVISD::ASUB;
17001 else if (LHSIsZExt && RHSIsZExt)
17002 Opc = RISCVISD::ASUBU;
17003 else
17004 return SDValue();
17005 break;
17006 case ISD::MUL:
17007 // MULH*/MULHR*: shift amount must be element size, only for i16/i32
17008 if (ShAmtVal != EltBits || (EltBits != 16 && EltBits != 32))
17009 return SDValue();
17010 if (IsRounding) {
17011 if (LHSIsSExt && RHSIsSExt) {
17012 Opc = RISCVISD::MULHR;
17013 } else if (LHSIsZExt && RHSIsZExt) {
17014 Opc = RISCVISD::MULHRU;
17015 } else if ((LHSIsSExt && RHSIsZExt) || (LHSIsZExt && RHSIsSExt)) {
17016 Opc = RISCVISD::MULHRSU;
17017 // commuted case
17018 if (LHSIsZExt && RHSIsSExt)
17019 std::swap(A, B);
17020 } else {
17021 return SDValue();
17022 }
17023 } else {
17024 if ((LHSIsSExt && RHSIsZExt) || (LHSIsZExt && RHSIsSExt)) {
17025 Opc = RISCVISD::MULHSU;
17026 // commuted case
17027 if (LHSIsZExt && RHSIsSExt)
17028 std::swap(A, B);
17029 } else
17030 return SDValue();
17031 }
17032 break;
17033 }
17034
17035 return DAG.getNode(Opc, SDLoc(N), VT, {A, B});
17036}
17037
17039 const RISCVSubtarget &Subtarget) {
17040 SDValue N0 = N->getOperand(0);
17041 EVT VT = N->getValueType(0);
17042
17043 if (VT.isFixedLengthVector() && Subtarget.hasStdExtP())
17044 return combinePExtTruncate(N, DAG, Subtarget);
17045
17046 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
17047 // extending X. This is safe since we only need the LSB after the shift and
17048 // shift amounts larger than 31 would produce poison. If we wait until
17049 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
17050 // to use a BEXT instruction.
17051 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
17052 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
17053 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
17054 SDLoc DL(N0);
17055 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
17056 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
17057 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
17058 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
17059 }
17060
17061 return combineTruncSelectToSMaxUSat(N, DAG);
17062}
17063
17064// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
17065// truncation. But RVV doesn't have truncation instructions for more than twice
17066// the bitwidth.
17067//
17068// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
17069//
17070// vsetvli a0, zero, e32, m2, ta, ma
17071// vnsrl.wi v12, v8, 0
17072// vsetvli zero, zero, e16, m1, ta, ma
17073// vnsrl.wi v8, v12, 0
17074// vsetvli zero, zero, e8, mf2, ta, ma
17075// vnsrl.wi v8, v8, 0
17076//
17077// So reverse the combine so we generate an vmseq/vmsne again:
17078//
17079// and (lshr (trunc X), ShAmt), 1
17080// -->
17081// zext (icmp ne (and X, (1 << ShAmt)), 0)
17082//
17083// and (lshr (not (trunc X)), ShAmt), 1
17084// -->
17085// zext (icmp eq (and X, (1 << ShAmt)), 0)
17087 const RISCVSubtarget &Subtarget) {
17088 using namespace SDPatternMatch;
17089 SDLoc DL(N);
17090
17091 if (!Subtarget.hasVInstructions())
17092 return SDValue();
17093
17094 EVT VT = N->getValueType(0);
17095 if (!VT.isVector())
17096 return SDValue();
17097
17098 APInt ShAmt;
17099 SDValue Inner;
17100 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
17101 m_One())))
17102 return SDValue();
17103
17104 SDValue X;
17105 bool IsNot;
17106 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
17107 IsNot = true;
17108 else if (sd_match(Inner, m_Trunc(m_Value(X))))
17109 IsNot = false;
17110 else
17111 return SDValue();
17112
17113 EVT WideVT = X.getValueType();
17114 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
17115 return SDValue();
17116
17117 SDValue Res =
17118 DAG.getNode(ISD::AND, DL, WideVT, X,
17119 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
17120 Res = DAG.getSetCC(DL,
17121 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17122 WideVT.getVectorElementCount()),
17123 Res, DAG.getConstant(0, DL, WideVT),
17124 IsNot ? ISD::SETEQ : ISD::SETNE);
17125 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
17126}
17127
17128// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
17129// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
17130// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
17131// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
17133 const RISCVSubtarget &Subtarget) {
17134 if (!Subtarget.hasCZEROLike())
17135 return SDValue();
17136
17137 SDValue N0 = N->getOperand(0);
17138 SDValue N1 = N->getOperand(1);
17139
17140 auto IsEqualCompZero = [](SDValue &V) -> bool {
17141 if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
17142 ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
17144 return true;
17145 }
17146 return false;
17147 };
17148
17149 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
17150 std::swap(N0, N1);
17151 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
17152 return SDValue();
17153
17154 KnownBits Known = DAG.computeKnownBits(N1);
17155 if (Known.getMaxValue().ugt(1))
17156 return SDValue();
17157
17158 unsigned CzeroOpcode =
17159 (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
17160 ? RISCVISD::CZERO_EQZ
17161 : RISCVISD::CZERO_NEZ;
17162
17163 EVT VT = N->getValueType(0);
17164 SDLoc DL(N);
17165 return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
17166}
17167
17170 SelectionDAG &DAG = DCI.DAG;
17171 if (N->getOpcode() != ISD::AND)
17172 return SDValue();
17173
17174 SDValue N0 = N->getOperand(0);
17175 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
17176 return SDValue();
17177 if (!N0.hasOneUse())
17178 return SDValue();
17179
17182 return SDValue();
17183
17184 EVT LoadedVT = ALoad->getMemoryVT();
17185 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
17186 if (!MaskConst)
17187 return SDValue();
17188 uint64_t Mask = MaskConst->getZExtValue();
17189 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
17190 if (Mask != ExpectedMask)
17191 return SDValue();
17192
17193 SDValue ZextLoad = DAG.getAtomicLoad(
17194 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
17195 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
17196 DCI.CombineTo(N, ZextLoad);
17197 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
17199 return SDValue(N, 0);
17200}
17201
17202// Sometimes a mask is applied after a shift. If that shift was fed by a
17203// load, there is sometimes the opportunity to narrow the load, which is
17204// hidden by the intermediate shift. Detect that case and commute the
17205// shift/and in order to enable load narrowing.
17207 EVT VT = N->getValueType(0);
17208 if (!VT.isScalarInteger())
17209 return SDValue();
17210
17211 using namespace SDPatternMatch;
17212 SDValue LoadNode;
17213 APInt MaskVal, ShiftVal;
17214 // (and (shl (load ...), ShiftAmt), Mask)
17215 if (!sd_match(
17217 m_ConstInt(ShiftVal))),
17218 m_ConstInt(MaskVal)))) {
17219 return SDValue();
17220 }
17221
17222 uint64_t ShiftAmt = ShiftVal.getZExtValue();
17223
17224 if (ShiftAmt >= VT.getSizeInBits())
17225 return SDValue();
17226
17227 // Calculate the appropriate mask if it were applied before the shift.
17228 APInt InnerMask = MaskVal.lshr(ShiftAmt);
17229 bool IsNarrowable =
17230 InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
17231
17232 if (!IsNarrowable)
17233 return SDValue();
17234
17235 // AND the loaded value and change the shift appropriately, allowing
17236 // the load to be narrowed.
17237 SDLoc DL(N);
17238 SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
17239 DAG.getConstant(InnerMask, DL, VT));
17240 return DAG.getNode(ISD::SHL, DL, VT, InnerAnd,
17241 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
17242}
17243
17244// Combines two comparison operation and logic operation to one selection
17245// operation(min, max) and logic operation. Returns new constructed Node if
17246// conditions for optimization are satisfied.
17249 const RISCVSubtarget &Subtarget) {
17250 SelectionDAG &DAG = DCI.DAG;
17251 SDValue N0 = N->getOperand(0);
17252
17253 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
17254 // extending X. This is safe since we only need the LSB after the shift and
17255 // shift amounts larger than 31 would produce poison. If we wait until
17256 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
17257 // to use a BEXT instruction.
17258 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
17259 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
17260 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
17261 N0.hasOneUse()) {
17262 SDLoc DL(N);
17263 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
17264 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
17265 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
17266 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
17267 DAG.getConstant(1, DL, MVT::i64));
17268 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
17269 }
17270
17272 return V;
17273 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
17274 return V;
17275 if (DCI.isAfterLegalizeDAG())
17276 if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
17277 return V;
17278 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17279 return V;
17280 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17281 return V;
17282 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
17283 return V;
17284
17285 if (DCI.isAfterLegalizeDAG())
17286 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
17287 return V;
17288
17289 // fold (and (select lhs, rhs, cc, -1, y), x) ->
17290 // (select lhs, rhs, cc, x, (and x, y))
17291 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
17292}
17293
17294// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
17295// FIXME: Generalize to other binary operators with same operand.
17297 SelectionDAG &DAG) {
17298 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
17299
17300 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
17301 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
17302 !N0.hasOneUse() || !N1.hasOneUse())
17303 return SDValue();
17304
17305 // Should have the same condition.
17306 SDValue Cond = N0.getOperand(1);
17307 if (Cond != N1.getOperand(1))
17308 return SDValue();
17309
17310 SDValue TrueV = N0.getOperand(0);
17311 SDValue FalseV = N1.getOperand(0);
17312
17313 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
17314 TrueV.getOperand(1) != FalseV.getOperand(1) ||
17315 !isOneConstant(TrueV.getOperand(1)) ||
17316 !TrueV.hasOneUse() || !FalseV.hasOneUse())
17317 return SDValue();
17318
17319 EVT VT = N->getValueType(0);
17320 SDLoc DL(N);
17321
17322 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
17323 Cond);
17324 SDValue NewN1 =
17325 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
17326 SDValue NewOr =
17327 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
17328 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
17329}
17330
17331// (xor X, (xor (and X, C2), Y))
17332// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
17333// where C2 is a shifted mask with width = Width and shift = ShAmt
17334// qc_insb might become qc.insb or qc.insbi depending on the operands.
17336 const RISCVSubtarget &Subtarget) {
17337 if (!Subtarget.hasVendorXqcibm())
17338 return SDValue();
17339
17340 using namespace SDPatternMatch;
17341 SDValue Base, Inserted;
17342 APInt CMask;
17343 if (!sd_match(N, m_Xor(m_Value(Base),
17345 m_ConstInt(CMask))),
17346 m_Value(Inserted))))))
17347 return SDValue();
17348
17349 if (N->getValueType(0) != MVT::i32)
17350 return SDValue();
17351 unsigned Width, ShAmt;
17352 if (!CMask.isShiftedMask(ShAmt, Width))
17353 return SDValue();
17354
17355 // Check if all zero bits in CMask are also zero in Inserted
17356 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
17357 return SDValue();
17358
17359 SDLoc DL(N);
17360
17361 // `Inserted` needs to be right shifted before it is put into the
17362 // instruction.
17363 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
17364 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
17365
17366 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
17367 DAG.getConstant(ShAmt, DL, MVT::i32)};
17368 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
17369}
17370
17372 const RISCVSubtarget &Subtarget) {
17373 if (!Subtarget.hasVendorXqcibm())
17374 return SDValue();
17375
17376 using namespace SDPatternMatch;
17377
17378 SDValue X;
17379 APInt MaskImm;
17380 if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
17381 return SDValue();
17382
17383 unsigned ShAmt, Width;
17384 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
17385 return SDValue();
17386
17387 if (N->getValueType(0) != MVT::i32)
17388 return SDValue();
17389
17390 // If Zbs is enabled and it is a single bit set we can use BSETI which
17391 // can be compressed to C_BSETI when Xqcibm in enabled.
17392 if (Width == 1 && Subtarget.hasStdExtZbs())
17393 return SDValue();
17394
17395 // If C1 is a shifted mask (but can't be formed as an ORI),
17396 // use a bitfield insert of -1.
17397 // Transform (or x, C1)
17398 // -> (qc.insbi x, -1, width, shift)
17399 SDLoc DL(N);
17400
17401 SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),
17402 DAG.getConstant(Width, DL, MVT::i32),
17403 DAG.getConstant(ShAmt, DL, MVT::i32)};
17404 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
17405}
17406
17407// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
17408// being inserted only sets known zero bits.
17410 const RISCVSubtarget &Subtarget) {
17411 // Supported only in Xqcibm for now.
17412 if (!Subtarget.hasVendorXqcibm())
17413 return SDValue();
17414
17415 using namespace SDPatternMatch;
17416
17417 SDValue Inserted;
17418 APInt MaskImm, OrImm;
17419 if (!sd_match(
17420 N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),
17421 m_ConstInt(MaskImm))),
17422 m_ConstInt(OrImm)))))
17423 return SDValue();
17424
17425 // Compute the Known Zero for the AND as this allows us to catch more general
17426 // cases than just looking for AND with imm.
17427 KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
17428
17429 // The bits being inserted must only set those bits that are known to be
17430 // zero.
17431 if (!OrImm.isSubsetOf(Known.Zero)) {
17432 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
17433 // currently handle this case.
17434 return SDValue();
17435 }
17436
17437 unsigned ShAmt, Width;
17438 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
17439 if (!Known.Zero.isShiftedMask(ShAmt, Width))
17440 return SDValue();
17441
17442 // QC_INSB(I) dst, src, #width, #shamt.
17443 SDLoc DL(N);
17444
17445 SDValue ImmNode =
17446 DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);
17447
17448 SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),
17449 DAG.getConstant(ShAmt, DL, MVT::i32)};
17450 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
17451}
17452
17454 const RISCVSubtarget &Subtarget) {
17455 SelectionDAG &DAG = DCI.DAG;
17456
17457 if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))
17458 return V;
17459 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17460 return V;
17461 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17462 return V;
17463
17464 if (DCI.isAfterLegalizeDAG()) {
17465 if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))
17466 return V;
17467 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
17468 return V;
17469 }
17470
17471 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
17472 // We may be able to pull a common operation out of the true and false value.
17473 SDValue N0 = N->getOperand(0);
17474 SDValue N1 = N->getOperand(1);
17475 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
17476 return V;
17477 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
17478 return V;
17479
17480 // fold (or (select cond, 0, y), x) ->
17481 // (select cond, x, (or x, y))
17482 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
17483}
17484
17486 const RISCVSubtarget &Subtarget) {
17487 SDValue N0 = N->getOperand(0);
17488 SDValue N1 = N->getOperand(1);
17489
17490 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
17491 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
17492 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
17493 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
17494 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
17495 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
17496 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
17497 SDLoc DL(N);
17498 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
17499 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
17500 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
17501 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
17502 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
17503 }
17504
17505 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
17506 // NOTE: Assumes ROL being legal means ROLW is legal.
17507 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17508 if (N0.getOpcode() == RISCVISD::SLLW &&
17510 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
17511 SDLoc DL(N);
17512 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
17513 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
17514 }
17515
17516 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
17517 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
17518 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
17520 if (ConstN00 && CC == ISD::SETLT) {
17521 EVT VT = N0.getValueType();
17522 SDLoc DL(N0);
17523 const APInt &Imm = ConstN00->getAPIntValue();
17524 if ((Imm + 1).isSignedIntN(12))
17525 return DAG.getSetCC(DL, VT, N0.getOperand(1),
17526 DAG.getConstant(Imm + 1, DL, VT), CC);
17527 }
17528 }
17529
17530 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
17531 return V;
17532
17533 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17534 return V;
17535 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17536 return V;
17537
17538 // fold (xor (select cond, 0, y), x) ->
17539 // (select cond, x, (xor x, y))
17540 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
17541}
17542
17543// Try to expand a multiply to a sequence of shifts and add/subs,
17544// for a machine without native mul instruction.
17546 uint64_t MulAmt) {
17547 SDLoc DL(N);
17548 EVT VT = N->getValueType(0);
17550
17551 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
17552 SDValue N0 = N->getOperand(0);
17553
17554 // Find the Non-adjacent form of the multiplier.
17555 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
17556 if (E & 1) {
17557 bool IsAdd = (E & 3) == 1;
17558 E -= IsAdd ? 1 : -1;
17559 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
17560 DAG.getShiftAmountConstant(I, VT, DL));
17561 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
17562 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
17563 }
17564 }
17565
17566 return Result;
17567}
17568
17569// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
17571 uint64_t MulAmt) {
17572 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
17573 SDValue X = N->getOperand(0);
17575 uint64_t ShiftAmt1;
17576 bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit);
17577 auto PreferSub = [X, MulAmtLowBit]() {
17578 // For MulAmt == 3 << M both (X << M + 2) - (X << M)
17579 // and (X << M + 1) + (X << M) are valid expansions.
17580 // Prefer SUB if we can get (X << M + 2) for free,
17581 // because X is exact (Y >> M + 2).
17582 uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;
17583 using namespace SDPatternMatch;
17584 return sd_match(X, m_ExactSr(m_Value(), m_SpecificInt(ShAmt)));
17585 };
17586 if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {
17587 Op = ISD::ADD;
17588 ShiftAmt1 = MulAmt - MulAmtLowBit;
17589 } else if (CanSub) {
17590 Op = ISD::SUB;
17591 ShiftAmt1 = MulAmt + MulAmtLowBit;
17592 } else {
17593 return SDValue();
17594 }
17595 EVT VT = N->getValueType(0);
17596 SDLoc DL(N);
17597 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X,
17598 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
17599 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X,
17600 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
17601 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
17602}
17603
17604static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
17605 unsigned ShY, bool AddX, unsigned Shift) {
17606 SDLoc DL(N);
17607 EVT VT = N->getValueType(0);
17608 SDValue X = N->getOperand(0);
17609 // Put the shift first if we can fold:
17610 // a. a zext into the shift forming a slli.uw
17611 // b. an exact shift right forming one shorter shift or no shift at all
17612 using namespace SDPatternMatch;
17613 if (Shift != 0 &&
17614 sd_match(X, m_AnyOf(m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))),
17615 m_ExactSr(m_Value(), m_ConstInt())))) {
17616 X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
17617 Shift = 0;
17618 }
17619 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17620 DAG.getTargetConstant(ShY, DL, VT), X);
17621 if (ShX != 0)
17622 ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,
17623 DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);
17624 if (Shift == 0)
17625 return ShlAdd;
17626 // Otherwise, put the shl last so that it can fold with following instructions
17627 // (e.g. sext or add).
17628 return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));
17629}
17630
17632 uint64_t MulAmt, unsigned Shift) {
17633 switch (MulAmt) {
17634 // 3/5/9 -> (shYadd X, X)
17635 case 3:
17636 return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);
17637 case 5:
17638 return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);
17639 case 9:
17640 return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);
17641 // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
17642 case 5 * 3:
17643 return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);
17644 case 9 * 3:
17645 return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);
17646 case 5 * 5:
17647 return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);
17648 case 9 * 5:
17649 return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);
17650 case 9 * 9:
17651 return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);
17652 default:
17653 break;
17654 }
17655
17656 int ShX;
17657 if (int ShY = isShifted359(MulAmt - 1, ShX)) {
17658 assert(ShX != 0 && "MulAmt=4,6,10 handled before");
17659 // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
17660 if (ShX <= 3)
17661 return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
17662 // 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X)
17663 if (Shift == 0) {
17664 SDLoc DL(N);
17665 EVT VT = N->getValueType(0);
17666 SDValue X = N->getOperand(0);
17667 SDValue Shl =
17668 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT));
17669 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
17670 DAG.getTargetConstant(ShY, DL, VT), Shl);
17671 return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X);
17672 }
17673 }
17674 return SDValue();
17675}
17676
17677// Try to expand a scalar multiply to a faster sequence.
17680 const RISCVSubtarget &Subtarget) {
17681
17682 EVT VT = N->getValueType(0);
17683
17684 // LI + MUL is usually smaller than the alternative sequence.
17686 return SDValue();
17687
17688 if (VT != Subtarget.getXLenVT())
17689 return SDValue();
17690
17691 bool ShouldExpandMul =
17692 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
17693 !Subtarget.hasStdExtZmmul();
17694 if (!ShouldExpandMul)
17695 return SDValue();
17696
17697 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17698 if (!CNode)
17699 return SDValue();
17700 uint64_t MulAmt = CNode->getZExtValue();
17701
17702 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
17703 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
17704 return SDValue();
17705
17706 // WARNING: The code below is knowingly incorrect with regards to undef
17707 // semantics. We're adding additional uses of X here, and in principle, we
17708 // should be freezing X before doing so. However, adding freeze here causes
17709 // real regressions, and no other target properly freezes X in these cases
17710 // either.
17711 if (Subtarget.hasShlAdd(3)) {
17712 // 3/5/9 * 2^N -> (shl (shXadd X, X), N)
17713 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
17714 // of 25 which happen to be quite common.
17715 // (2/4/8 * 3/5/9 + 1) * 2^N
17716 unsigned Shift = llvm::countr_zero(MulAmt);
17717 if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))
17718 return V;
17719
17720 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
17721 // shXadd. First check if this a sum of two power of 2s because that's
17722 // easy. Then count how many zeros are up to the first bit.
17723 SDValue X = N->getOperand(0);
17724 if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
17725 unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
17726 SDLoc DL(N);
17727 SDValue Shift1 =
17728 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
17729 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17730 DAG.getTargetConstant(Shift, DL, VT), Shift1);
17731 }
17732
17733 // TODO: 2^(C1>3) * 3/5/9 - 1
17734
17735 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
17736 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
17737 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
17738 if (ScaleShift >= 1 && ScaleShift < 4) {
17739 unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));
17740 SDLoc DL(N);
17741 SDValue Shift1 =
17742 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
17743 return DAG.getNode(
17744 ISD::ADD, DL, VT, Shift1,
17745 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17746 DAG.getTargetConstant(ScaleShift, DL, VT), X));
17747 }
17748 }
17749
17750 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
17751 for (uint64_t Offset : {3, 5, 9}) {
17752 if (isPowerOf2_64(MulAmt + Offset)) {
17753 unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);
17754 if (ShAmt >= VT.getSizeInBits())
17755 continue;
17756 SDLoc DL(N);
17757 SDValue Shift1 =
17758 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
17759 SDValue Mul359 =
17760 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17761 DAG.getTargetConstant(Log2_64(Offset - 1), DL, VT), X);
17762 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
17763 }
17764 }
17765 }
17766
17767 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
17768 return V;
17769
17770 if (!Subtarget.hasStdExtZmmul())
17771 return expandMulToNAFSequence(N, DAG, MulAmt);
17772
17773 return SDValue();
17774}
17775
17776// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
17777// (bitcast (sra (v2Xi16 (bitcast X)), 15))
17778// Same for other equivalent types with other equivalent constants.
17780 EVT VT = N->getValueType(0);
17781 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17782
17783 // Do this for legal vectors unless they are i1 or i8 vectors.
17784 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
17785 return SDValue();
17786
17787 if (N->getOperand(0).getOpcode() != ISD::AND ||
17788 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
17789 return SDValue();
17790
17791 SDValue And = N->getOperand(0);
17792 SDValue Srl = And.getOperand(0);
17793
17794 APInt V1, V2, V3;
17795 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
17796 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
17798 return SDValue();
17799
17800 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
17801 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
17802 V3 != (HalfSize - 1))
17803 return SDValue();
17804
17805 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
17806 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
17807 VT.getVectorElementCount() * 2);
17808 SDLoc DL(N);
17809 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
17810 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
17811 DAG.getConstant(HalfSize - 1, DL, HalfVT));
17812 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
17813}
17814
17817 const RISCVSubtarget &Subtarget) {
17818 EVT VT = N->getValueType(0);
17819 if (!VT.isVector())
17820 return expandMul(N, DAG, DCI, Subtarget);
17821
17822 SDLoc DL(N);
17823 SDValue N0 = N->getOperand(0);
17824 SDValue N1 = N->getOperand(1);
17825 SDValue MulOper;
17826 unsigned AddSubOpc;
17827
17828 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
17829 // (mul x, add (y, 1)) -> (add x, (mul x, y))
17830 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
17831 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
17832 auto IsAddSubWith1 = [&](SDValue V) -> bool {
17833 AddSubOpc = V->getOpcode();
17834 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
17835 SDValue Opnd = V->getOperand(1);
17836 MulOper = V->getOperand(0);
17837 if (AddSubOpc == ISD::SUB)
17838 std::swap(Opnd, MulOper);
17839 if (isOneOrOneSplat(Opnd))
17840 return true;
17841 }
17842 return false;
17843 };
17844
17845 if (IsAddSubWith1(N0)) {
17846 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
17847 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
17848 }
17849
17850 if (IsAddSubWith1(N1)) {
17851 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
17852 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
17853 }
17854
17855 if (SDValue V = combineBinOpOfZExt(N, DAG))
17856 return V;
17857
17859 return V;
17860
17861 return SDValue();
17862}
17863
17864/// According to the property that indexed load/store instructions zero-extend
17865/// their indices, try to narrow the type of index operand.
17866static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
17867 if (isIndexTypeSigned(IndexType))
17868 return false;
17869
17870 if (!N->hasOneUse())
17871 return false;
17872
17873 EVT VT = N.getValueType();
17874 SDLoc DL(N);
17875
17876 // In general, what we're doing here is seeing if we can sink a truncate to
17877 // a smaller element type into the expression tree building our index.
17878 // TODO: We can generalize this and handle a bunch more cases if useful.
17879
17880 // Narrow a buildvector to the narrowest element type. This requires less
17881 // work and less register pressure at high LMUL, and creates smaller constants
17882 // which may be cheaper to materialize.
17883 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
17884 KnownBits Known = DAG.computeKnownBits(N);
17885 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
17886 LLVMContext &C = *DAG.getContext();
17887 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
17888 if (ResultVT.bitsLT(VT.getVectorElementType())) {
17889 N = DAG.getNode(ISD::TRUNCATE, DL,
17890 VT.changeVectorElementType(C, ResultVT), N);
17891 return true;
17892 }
17893 }
17894
17895 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
17896 if (N.getOpcode() != ISD::SHL)
17897 return false;
17898
17899 SDValue N0 = N.getOperand(0);
17900 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
17901 N0.getOpcode() != RISCVISD::VZEXT_VL)
17902 return false;
17903 if (!N0->hasOneUse())
17904 return false;
17905
17906 APInt ShAmt;
17907 SDValue N1 = N.getOperand(1);
17908 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
17909 return false;
17910
17911 SDValue Src = N0.getOperand(0);
17912 EVT SrcVT = Src.getValueType();
17913 unsigned SrcElen = SrcVT.getScalarSizeInBits();
17914 unsigned ShAmtV = ShAmt.getZExtValue();
17915 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
17916 NewElen = std::max(NewElen, 8U);
17917
17918 // Skip if NewElen is not narrower than the original extended type.
17919 if (NewElen >= N0.getValueType().getScalarSizeInBits())
17920 return false;
17921
17922 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
17923 EVT NewVT = SrcVT.changeVectorElementType(*DAG.getContext(), NewEltVT);
17924
17925 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
17926 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
17927 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
17928 return true;
17929}
17930
17931/// Try to map an integer comparison with size > XLEN to vector instructions
17932/// before type legalization splits it up into chunks.
17933static SDValue
17935 const SDLoc &DL, SelectionDAG &DAG,
17936 const RISCVSubtarget &Subtarget) {
17937 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
17938
17939 if (!Subtarget.hasVInstructions())
17940 return SDValue();
17941
17942 MVT XLenVT = Subtarget.getXLenVT();
17943 EVT OpVT = X.getValueType();
17944 // We're looking for an oversized integer equality comparison.
17945 if (!OpVT.isScalarInteger())
17946 return SDValue();
17947
17948 unsigned OpSize = OpVT.getSizeInBits();
17949 // The size should be larger than XLen and smaller than the maximum vector
17950 // size.
17951 if (OpSize <= Subtarget.getXLen() ||
17952 OpSize > Subtarget.getRealMinVLen() *
17954 return SDValue();
17955
17956 // Don't perform this combine if constructing the vector will be expensive.
17957 auto IsVectorBitCastCheap = [](SDValue X) {
17959 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
17960 X.getOpcode() == ISD::LOAD;
17961 };
17962 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
17963 return SDValue();
17964
17966 Attribute::NoImplicitFloat))
17967 return SDValue();
17968
17969 // Bail out for non-byte-sized types.
17970 if (!OpVT.isByteSized())
17971 return SDValue();
17972
17973 unsigned VecSize = OpSize / 8;
17974 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
17975 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
17976
17977 SDValue VecX = DAG.getBitcast(VecVT, X);
17978 SDValue VecY = DAG.getBitcast(VecVT, Y);
17979 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
17980 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
17981
17982 SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
17983 return DAG.getSetCC(DL, VT,
17984 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
17985 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
17986 VL),
17987 DAG.getConstant(0, DL, XLenVT), CC);
17988}
17989
17992 const RISCVSubtarget &Subtarget) {
17993 SelectionDAG &DAG = DCI.DAG;
17994 SDLoc dl(N);
17995 SDValue N0 = N->getOperand(0);
17996 SDValue N1 = N->getOperand(1);
17997 EVT VT = N->getValueType(0);
17998 EVT OpVT = N0.getValueType();
17999
18000 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
18001 // Looking for an equality compare.
18002 if (!isIntEqualitySetCC(Cond))
18003 return SDValue();
18004
18005 if (SDValue V =
18006 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
18007 return V;
18008
18009 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
18010 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
18012 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
18013 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
18014 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
18015 AndRHSC.isNegatedPowerOf2()) {
18016 unsigned ShiftBits = AndRHSC.countr_zero();
18017 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
18018 DAG.getConstant(ShiftBits, dl, OpVT));
18019 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
18020 }
18021
18022 // Similar to above but handling the lower 32 bits by using sraiw. Allow
18023 // comparing with constants other than 0 if the constant can be folded into
18024 // addi or xori after shifting.
18025 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
18026 uint64_t AndRHSInt = AndRHSC.getZExtValue();
18027 if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&
18028 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
18029 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
18030 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
18031 if (NewC >= -2048 && NewC <= 2048) {
18032 SDValue SExt =
18033 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
18034 DAG.getValueType(MVT::i32));
18035 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
18036 DAG.getConstant(ShiftBits, dl, OpVT));
18037 return DAG.getSetCC(dl, VT, Shift,
18038 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
18039 }
18040 }
18041
18042 // Fold (and X, Mask) ==/!= C -> X ==/!= sext(C, countr_one(Mask)) if the
18043 // Mask is only clearing redundant sign bits.
18044 if (isMask_64(AndRHSInt)) {
18045 unsigned TrailingOnes = llvm::countr_one(AndRHSInt);
18046 unsigned N1Width = llvm::bit_width(N1Int);
18047 int64_t N1SExt = SignExtend64(N1Int, TrailingOnes);
18048 if (N1Width <= TrailingOnes && isInt<12>(N1SExt) &&
18049 DAG.ComputeMaxSignificantBits(N0.getOperand(0)) <= TrailingOnes)
18050 return DAG.getSetCC(dl, VT, N0.getOperand(0),
18051 DAG.getSignedConstant(N1SExt, dl, OpVT), Cond);
18052 }
18053 }
18054
18055 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
18056 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
18057 // bit 31. Same for setne. C1' may be cheaper to materialize and the
18058 // sext_inreg can become a sext.w instead of a shift pair.
18059 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
18060 return SDValue();
18061
18062 // RHS needs to be a constant.
18063 auto *N1C = dyn_cast<ConstantSDNode>(N1);
18064 if (!N1C)
18065 return SDValue();
18066
18067 // LHS needs to be (and X, 0xffffffff).
18068 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
18070 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
18071 return SDValue();
18072
18073 // Don't do this if the sign bit is provably zero, it will be turned back into
18074 // an AND.
18075 APInt SignMask = APInt::getOneBitSet(64, 31);
18076 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
18077 return SDValue();
18078
18079 const APInt &C1 = N1C->getAPIntValue();
18080
18081 // If the constant is larger than 2^32 - 1 it is impossible for both sides
18082 // to be equal.
18083 if (C1.getActiveBits() > 32)
18084 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
18085
18086 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
18087 N0.getOperand(0), DAG.getValueType(MVT::i32));
18088 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
18089 dl, OpVT), Cond);
18090}
18091
18092static SDValue
18094 const RISCVSubtarget &Subtarget) {
18095 SelectionDAG &DAG = DCI.DAG;
18096 SDValue Src = N->getOperand(0);
18097 EVT VT = N->getValueType(0);
18098 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18099 unsigned Opc = Src.getOpcode();
18100 SDLoc DL(N);
18101
18102 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
18103 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
18104 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
18105 Subtarget.hasStdExtZfhmin())
18106 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
18107
18108 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
18109 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
18110 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
18111 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
18112 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
18113 Src.getOperand(1));
18114
18115 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
18116 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
18117 return DAG.getNegative(Src, DL, VT);
18118
18119 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
18120 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
18121 isAllOnesConstant(Src.getOperand(1)) &&
18122 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
18123 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
18124 DAG.getAllOnesConstant(DL, VT));
18125
18126 return SDValue();
18127}
18128
18129namespace {
18130// Forward declaration of the structure holding the necessary information to
18131// apply a combine.
18132struct CombineResult;
18133
18134enum ExtKind : uint8_t {
18135 ZExt = 1 << 0,
18136 SExt = 1 << 1,
18137 FPExt = 1 << 2,
18138 BF16Ext = 1 << 3
18139};
18140/// Helper class for folding sign/zero extensions.
18141/// In particular, this class is used for the following combines:
18142/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
18143/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
18144/// mul | mul_vl -> vwmul(u) | vwmul_su
18145/// shl | shl_vl -> vwsll
18146/// fadd -> vfwadd | vfwadd_w
18147/// fsub -> vfwsub | vfwsub_w
18148/// fmul -> vfwmul
18149/// An object of this class represents an operand of the operation we want to
18150/// combine.
18151/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
18152/// NodeExtensionHelper for `a` and one for `b`.
18153///
18154/// This class abstracts away how the extension is materialized and
18155/// how its number of users affect the combines.
18156///
18157/// In particular:
18158/// - VWADD_W is conceptually == add(op0, sext(op1))
18159/// - VWADDU_W == add(op0, zext(op1))
18160/// - VWSUB_W == sub(op0, sext(op1))
18161/// - VWSUBU_W == sub(op0, zext(op1))
18162/// - VFWADD_W == fadd(op0, fpext(op1))
18163/// - VFWSUB_W == fsub(op0, fpext(op1))
18164/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
18165/// zext|sext(smaller_value).
18166struct NodeExtensionHelper {
18167 /// Records if this operand is like being zero extended.
18168 bool SupportsZExt;
18169 /// Records if this operand is like being sign extended.
18170 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
18171 /// instance, a splat constant (e.g., 3), would support being both sign and
18172 /// zero extended.
18173 bool SupportsSExt;
18174 /// Records if this operand is like being floating point extended.
18175 bool SupportsFPExt;
18176 /// Records if this operand is extended from bf16.
18177 bool SupportsBF16Ext;
18178 /// This boolean captures whether we care if this operand would still be
18179 /// around after the folding happens.
18180 bool EnforceOneUse;
18181 /// Original value that this NodeExtensionHelper represents.
18182 SDValue OrigOperand;
18183
18184 /// Get the value feeding the extension or the value itself.
18185 /// E.g., for zext(a), this would return a.
18186 SDValue getSource() const {
18187 switch (OrigOperand.getOpcode()) {
18188 case ISD::ZERO_EXTEND:
18189 case ISD::SIGN_EXTEND:
18190 case RISCVISD::VSEXT_VL:
18191 case RISCVISD::VZEXT_VL:
18192 case RISCVISD::FP_EXTEND_VL:
18193 return OrigOperand.getOperand(0);
18194 default:
18195 return OrigOperand;
18196 }
18197 }
18198
18199 /// Check if this instance represents a splat.
18200 bool isSplat() const {
18201 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
18202 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
18203 }
18204
18205 /// Get the extended opcode.
18206 unsigned getExtOpc(ExtKind SupportsExt) const {
18207 switch (SupportsExt) {
18208 case ExtKind::SExt:
18209 return RISCVISD::VSEXT_VL;
18210 case ExtKind::ZExt:
18211 return RISCVISD::VZEXT_VL;
18212 case ExtKind::FPExt:
18213 case ExtKind::BF16Ext:
18214 return RISCVISD::FP_EXTEND_VL;
18215 }
18216 llvm_unreachable("Unknown ExtKind enum");
18217 }
18218
18219 /// Get or create a value that can feed \p Root with the given extension \p
18220 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
18221 /// operand. \see ::getSource().
18222 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
18223 const RISCVSubtarget &Subtarget,
18224 std::optional<ExtKind> SupportsExt) const {
18225 if (!SupportsExt.has_value())
18226 return OrigOperand;
18227
18228 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
18229
18230 SDValue Source = getSource();
18231 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
18232 if (Source.getValueType() == NarrowVT)
18233 return Source;
18234
18235 unsigned ExtOpc = getExtOpc(*SupportsExt);
18236
18237 // If we need an extension, we should be changing the type.
18238 SDLoc DL(OrigOperand);
18239 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
18240 switch (OrigOperand.getOpcode()) {
18241 case ISD::ZERO_EXTEND:
18242 case ISD::SIGN_EXTEND:
18243 case RISCVISD::VSEXT_VL:
18244 case RISCVISD::VZEXT_VL:
18245 case RISCVISD::FP_EXTEND_VL:
18246 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
18247 case ISD::SPLAT_VECTOR:
18248 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
18249 case RISCVISD::VMV_V_X_VL:
18250 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
18251 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
18252 case RISCVISD::VFMV_V_F_VL:
18253 Source = Source.getOperand(1);
18254 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
18255 Source = Source.getOperand(0);
18256 assert(Source.getValueType() == NarrowVT.getVectorElementType());
18257 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
18258 DAG.getUNDEF(NarrowVT), Source, VL);
18259 default:
18260 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
18261 // and that operand should already have the right NarrowVT so no
18262 // extension should be required at this point.
18263 llvm_unreachable("Unsupported opcode");
18264 }
18265 }
18266
18267 /// Helper function to get the narrow type for \p Root.
18268 /// The narrow type is the type of \p Root where we divided the size of each
18269 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
18270 /// \pre Both the narrow type and the original type should be legal.
18271 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
18272 MVT VT = Root->getSimpleValueType(0);
18273
18274 // Determine the narrow size.
18275 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
18276
18277 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
18278 : SupportsExt == ExtKind::FPExt
18279 ? MVT::getFloatingPointVT(NarrowSize)
18280 : MVT::getIntegerVT(NarrowSize);
18281
18282 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
18283 "Trying to extend something we can't represent");
18284 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
18285 return NarrowVT;
18286 }
18287
18288 /// Get the opcode to materialize:
18289 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
18290 static unsigned getSExtOpcode(unsigned Opcode) {
18291 switch (Opcode) {
18292 case ISD::ADD:
18293 case RISCVISD::ADD_VL:
18294 case RISCVISD::VWADD_W_VL:
18295 case RISCVISD::VWADDU_W_VL:
18296 case ISD::OR:
18297 case RISCVISD::OR_VL:
18298 return RISCVISD::VWADD_VL;
18299 case ISD::SUB:
18300 case RISCVISD::SUB_VL:
18301 case RISCVISD::VWSUB_W_VL:
18302 case RISCVISD::VWSUBU_W_VL:
18303 return RISCVISD::VWSUB_VL;
18304 case ISD::MUL:
18305 case RISCVISD::MUL_VL:
18306 return RISCVISD::VWMUL_VL;
18307 default:
18308 llvm_unreachable("Unexpected opcode");
18309 }
18310 }
18311
18312 /// Get the opcode to materialize:
18313 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
18314 static unsigned getZExtOpcode(unsigned Opcode) {
18315 switch (Opcode) {
18316 case ISD::ADD:
18317 case RISCVISD::ADD_VL:
18318 case RISCVISD::VWADD_W_VL:
18319 case RISCVISD::VWADDU_W_VL:
18320 case ISD::OR:
18321 case RISCVISD::OR_VL:
18322 return RISCVISD::VWADDU_VL;
18323 case ISD::SUB:
18324 case RISCVISD::SUB_VL:
18325 case RISCVISD::VWSUB_W_VL:
18326 case RISCVISD::VWSUBU_W_VL:
18327 return RISCVISD::VWSUBU_VL;
18328 case ISD::MUL:
18329 case RISCVISD::MUL_VL:
18330 return RISCVISD::VWMULU_VL;
18331 case ISD::SHL:
18332 case RISCVISD::SHL_VL:
18333 return RISCVISD::VWSLL_VL;
18334 default:
18335 llvm_unreachable("Unexpected opcode");
18336 }
18337 }
18338
18339 /// Get the opcode to materialize:
18340 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
18341 static unsigned getFPExtOpcode(unsigned Opcode) {
18342 switch (Opcode) {
18343 case RISCVISD::FADD_VL:
18344 case RISCVISD::VFWADD_W_VL:
18345 return RISCVISD::VFWADD_VL;
18346 case RISCVISD::FSUB_VL:
18347 case RISCVISD::VFWSUB_W_VL:
18348 return RISCVISD::VFWSUB_VL;
18349 case RISCVISD::FMUL_VL:
18350 return RISCVISD::VFWMUL_VL;
18351 case RISCVISD::VFMADD_VL:
18352 return RISCVISD::VFWMADD_VL;
18353 case RISCVISD::VFMSUB_VL:
18354 return RISCVISD::VFWMSUB_VL;
18355 case RISCVISD::VFNMADD_VL:
18356 return RISCVISD::VFWNMADD_VL;
18357 case RISCVISD::VFNMSUB_VL:
18358 return RISCVISD::VFWNMSUB_VL;
18359 default:
18360 llvm_unreachable("Unexpected opcode");
18361 }
18362 }
18363
18364 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
18365 /// newOpcode(a, b).
18366 static unsigned getSUOpcode(unsigned Opcode) {
18367 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
18368 "SU is only supported for MUL");
18369 return RISCVISD::VWMULSU_VL;
18370 }
18371
18372 /// Get the opcode to materialize
18373 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
18374 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
18375 switch (Opcode) {
18376 case ISD::ADD:
18377 case RISCVISD::ADD_VL:
18378 case ISD::OR:
18379 case RISCVISD::OR_VL:
18380 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
18381 : RISCVISD::VWADDU_W_VL;
18382 case ISD::SUB:
18383 case RISCVISD::SUB_VL:
18384 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
18385 : RISCVISD::VWSUBU_W_VL;
18386 case RISCVISD::FADD_VL:
18387 return RISCVISD::VFWADD_W_VL;
18388 case RISCVISD::FSUB_VL:
18389 return RISCVISD::VFWSUB_W_VL;
18390 default:
18391 llvm_unreachable("Unexpected opcode");
18392 }
18393 }
18394
18395 using CombineToTry = std::function<std::optional<CombineResult>(
18396 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
18397 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
18398 const RISCVSubtarget &)>;
18399
18400 /// Check if this node needs to be fully folded or extended for all users.
18401 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
18402
18403 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
18404 const RISCVSubtarget &Subtarget) {
18405 unsigned Opc = OrigOperand.getOpcode();
18406 MVT VT = OrigOperand.getSimpleValueType();
18407
18408 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
18409 "Unexpected Opcode");
18410
18411 // The pasthru must be undef for tail agnostic.
18412 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
18413 return;
18414
18415 // Get the scalar value.
18416 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
18417 : OrigOperand.getOperand(1);
18418
18419 // See if we have enough sign bits or zero bits in the scalar to use a
18420 // widening opcode by splatting to smaller element size.
18421 unsigned EltBits = VT.getScalarSizeInBits();
18422 unsigned ScalarBits = Op.getValueSizeInBits();
18423 // If we're not getting all bits from the element, we need special handling.
18424 if (ScalarBits < EltBits) {
18425 // This should only occur on RV32.
18426 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
18427 !Subtarget.is64Bit() && "Unexpected splat");
18428 // vmv.v.x sign extends narrow inputs.
18429 SupportsSExt = true;
18430
18431 // If the input is positive, then sign extend is also zero extend.
18432 if (DAG.SignBitIsZero(Op))
18433 SupportsZExt = true;
18434
18435 EnforceOneUse = false;
18436 return;
18437 }
18438
18439 unsigned NarrowSize = EltBits / 2;
18440 // If the narrow type cannot be expressed with a legal VMV,
18441 // this is not a valid candidate.
18442 if (NarrowSize < 8)
18443 return;
18444
18445 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
18446 SupportsSExt = true;
18447
18448 if (DAG.MaskedValueIsZero(Op,
18449 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
18450 SupportsZExt = true;
18451
18452 EnforceOneUse = false;
18453 }
18454
18455 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
18456 return (NarrowEltVT == MVT::f32 ||
18457 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
18458 }
18459
18460 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
18461 return NarrowEltVT == MVT::bf16 &&
18462 (Subtarget.hasStdExtZvfbfwma() || Subtarget.hasVInstructionsBF16());
18463 }
18464
18465 /// Helper method to set the various fields of this struct based on the
18466 /// type of \p Root.
18467 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
18468 const RISCVSubtarget &Subtarget) {
18469 SupportsZExt = false;
18470 SupportsSExt = false;
18471 SupportsFPExt = false;
18472 SupportsBF16Ext = false;
18473 EnforceOneUse = true;
18474 unsigned Opc = OrigOperand.getOpcode();
18475 // For the nodes we handle below, we end up using their inputs directly: see
18476 // getSource(). However since they either don't have a passthru or we check
18477 // that their passthru is undef, we can safely ignore their mask and VL.
18478 switch (Opc) {
18479 case ISD::ZERO_EXTEND:
18480 case ISD::SIGN_EXTEND: {
18481 MVT VT = OrigOperand.getSimpleValueType();
18482 if (!VT.isVector())
18483 break;
18484
18485 SDValue NarrowElt = OrigOperand.getOperand(0);
18486 MVT NarrowVT = NarrowElt.getSimpleValueType();
18487 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
18488 if (NarrowVT.getVectorElementType() == MVT::i1)
18489 break;
18490
18491 SupportsZExt = Opc == ISD::ZERO_EXTEND;
18492 SupportsSExt = Opc == ISD::SIGN_EXTEND;
18493 break;
18494 }
18495 case RISCVISD::VZEXT_VL:
18496 SupportsZExt = true;
18497 break;
18498 case RISCVISD::VSEXT_VL:
18499 SupportsSExt = true;
18500 break;
18501 case RISCVISD::FP_EXTEND_VL: {
18502 MVT NarrowEltVT =
18504 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
18505 SupportsFPExt = true;
18506 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
18507 SupportsBF16Ext = true;
18508
18509 break;
18510 }
18511 case ISD::SPLAT_VECTOR:
18512 case RISCVISD::VMV_V_X_VL:
18513 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
18514 break;
18515 case RISCVISD::VFMV_V_F_VL: {
18516 MVT VT = OrigOperand.getSimpleValueType();
18517
18518 if (!OrigOperand.getOperand(0).isUndef())
18519 break;
18520
18521 SDValue Op = OrigOperand.getOperand(1);
18522 if (Op.getOpcode() != ISD::FP_EXTEND)
18523 break;
18524
18525 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
18526 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
18527 if (NarrowSize != ScalarBits)
18528 break;
18529
18530 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
18531 SupportsFPExt = true;
18532 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
18533 Subtarget))
18534 SupportsBF16Ext = true;
18535 break;
18536 }
18537 default:
18538 break;
18539 }
18540 }
18541
18542 /// Check if \p Root supports any extension folding combines.
18543 static bool isSupportedRoot(const SDNode *Root,
18544 const RISCVSubtarget &Subtarget) {
18545 switch (Root->getOpcode()) {
18546 case ISD::ADD:
18547 case ISD::SUB:
18548 case ISD::MUL: {
18549 return Root->getValueType(0).isScalableVector();
18550 }
18551 case ISD::OR: {
18552 return Root->getValueType(0).isScalableVector() &&
18553 Root->getFlags().hasDisjoint();
18554 }
18555 // Vector Widening Integer Add/Sub/Mul Instructions
18556 case RISCVISD::ADD_VL:
18557 case RISCVISD::MUL_VL:
18558 case RISCVISD::VWADD_W_VL:
18559 case RISCVISD::VWADDU_W_VL:
18560 case RISCVISD::SUB_VL:
18561 case RISCVISD::VWSUB_W_VL:
18562 case RISCVISD::VWSUBU_W_VL:
18563 // Vector Widening Floating-Point Add/Sub/Mul Instructions
18564 case RISCVISD::FADD_VL:
18565 case RISCVISD::FSUB_VL:
18566 case RISCVISD::FMUL_VL:
18567 case RISCVISD::VFWADD_W_VL:
18568 case RISCVISD::VFWSUB_W_VL:
18569 return true;
18570 case RISCVISD::OR_VL:
18571 return Root->getFlags().hasDisjoint();
18572 case ISD::SHL:
18573 return Root->getValueType(0).isScalableVector() &&
18574 Subtarget.hasStdExtZvbb();
18575 case RISCVISD::SHL_VL:
18576 return Subtarget.hasStdExtZvbb();
18577 case RISCVISD::VFMADD_VL:
18578 case RISCVISD::VFNMSUB_VL:
18579 case RISCVISD::VFNMADD_VL:
18580 case RISCVISD::VFMSUB_VL:
18581 return true;
18582 default:
18583 return false;
18584 }
18585 }
18586
18587 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
18588 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
18589 const RISCVSubtarget &Subtarget) {
18590 assert(isSupportedRoot(Root, Subtarget) &&
18591 "Trying to build an helper with an "
18592 "unsupported root");
18593 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
18595 OrigOperand = Root->getOperand(OperandIdx);
18596
18597 unsigned Opc = Root->getOpcode();
18598 switch (Opc) {
18599 // We consider
18600 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
18601 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
18602 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
18603 case RISCVISD::VWADD_W_VL:
18604 case RISCVISD::VWADDU_W_VL:
18605 case RISCVISD::VWSUB_W_VL:
18606 case RISCVISD::VWSUBU_W_VL:
18607 case RISCVISD::VFWADD_W_VL:
18608 case RISCVISD::VFWSUB_W_VL:
18609 // Operand 1 can't be changed.
18610 if (OperandIdx == 1)
18611 break;
18612 [[fallthrough]];
18613 default:
18614 fillUpExtensionSupport(Root, DAG, Subtarget);
18615 break;
18616 }
18617 }
18618
18619 /// Helper function to get the Mask and VL from \p Root.
18620 static std::pair<SDValue, SDValue>
18621 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
18622 const RISCVSubtarget &Subtarget) {
18623 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
18624 switch (Root->getOpcode()) {
18625 case ISD::ADD:
18626 case ISD::SUB:
18627 case ISD::MUL:
18628 case ISD::OR:
18629 case ISD::SHL: {
18630 SDLoc DL(Root);
18631 MVT VT = Root->getSimpleValueType(0);
18632 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
18633 }
18634 default:
18635 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
18636 }
18637 }
18638
18639 /// Helper function to check if \p N is commutative with respect to the
18640 /// foldings that are supported by this class.
18641 static bool isCommutative(const SDNode *N) {
18642 switch (N->getOpcode()) {
18643 case ISD::ADD:
18644 case ISD::MUL:
18645 case ISD::OR:
18646 case RISCVISD::ADD_VL:
18647 case RISCVISD::MUL_VL:
18648 case RISCVISD::OR_VL:
18649 case RISCVISD::FADD_VL:
18650 case RISCVISD::FMUL_VL:
18651 case RISCVISD::VFMADD_VL:
18652 case RISCVISD::VFNMSUB_VL:
18653 case RISCVISD::VFNMADD_VL:
18654 case RISCVISD::VFMSUB_VL:
18655 return true;
18656 case RISCVISD::VWADD_W_VL:
18657 case RISCVISD::VWADDU_W_VL:
18658 case ISD::SUB:
18659 case RISCVISD::SUB_VL:
18660 case RISCVISD::VWSUB_W_VL:
18661 case RISCVISD::VWSUBU_W_VL:
18662 case RISCVISD::VFWADD_W_VL:
18663 case RISCVISD::FSUB_VL:
18664 case RISCVISD::VFWSUB_W_VL:
18665 case ISD::SHL:
18666 case RISCVISD::SHL_VL:
18667 return false;
18668 default:
18669 llvm_unreachable("Unexpected opcode");
18670 }
18671 }
18672
18673 /// Get a list of combine to try for folding extensions in \p Root.
18674 /// Note that each returned CombineToTry function doesn't actually modify
18675 /// anything. Instead they produce an optional CombineResult that if not None,
18676 /// need to be materialized for the combine to be applied.
18677 /// \see CombineResult::materialize.
18678 /// If the related CombineToTry function returns std::nullopt, that means the
18679 /// combine didn't match.
18681 getSupportedFoldings(const SDNode *Root, const RISCVSubtarget &Subtarget);
18682};
18683
18684/// Helper structure that holds all the necessary information to materialize a
18685/// combine that does some extension folding.
18686struct CombineResult {
18687 /// Opcode to be generated when materializing the combine.
18688 unsigned TargetOpcode;
18689 // No value means no extension is needed.
18690 std::optional<ExtKind> LHSExt;
18691 std::optional<ExtKind> RHSExt;
18692 /// Root of the combine.
18693 SDNode *Root;
18694 /// LHS of the TargetOpcode.
18695 NodeExtensionHelper LHS;
18696 /// RHS of the TargetOpcode.
18697 NodeExtensionHelper RHS;
18698
18699 CombineResult(unsigned TargetOpcode, SDNode *Root,
18700 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
18701 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
18702 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
18703 LHS(LHS), RHS(RHS) {}
18704
18705 /// Return a value that uses TargetOpcode and that can be used to replace
18706 /// Root.
18707 /// The actual replacement is *not* done in that method.
18708 SDValue materialize(SelectionDAG &DAG,
18709 const RISCVSubtarget &Subtarget) const {
18710 SDValue Mask, VL, Passthru;
18711 std::tie(Mask, VL) =
18712 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
18713 switch (Root->getOpcode()) {
18714 default:
18715 Passthru = Root->getOperand(2);
18716 break;
18717 case ISD::ADD:
18718 case ISD::SUB:
18719 case ISD::MUL:
18720 case ISD::OR:
18721 case ISD::SHL:
18722 Passthru = DAG.getUNDEF(Root->getValueType(0));
18723 break;
18724 }
18725 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
18726 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
18727 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
18728 Passthru, Mask, VL);
18729 }
18730};
18731
18732/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
18733/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
18734/// are zext) and LHS and RHS can be folded into Root.
18735/// AllowExtMask define which form `ext` can take in this pattern.
18736///
18737/// \note If the pattern can match with both zext and sext, the returned
18738/// CombineResult will feature the zext result.
18739///
18740/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18741/// can be used to apply the pattern.
18742static std::optional<CombineResult>
18743canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
18744 const NodeExtensionHelper &RHS,
18745 uint8_t AllowExtMask, SelectionDAG &DAG,
18746 const RISCVSubtarget &Subtarget) {
18747 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
18748 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
18749 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
18750 /*RHSExt=*/{ExtKind::ZExt});
18751 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
18752 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
18753 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18754 /*RHSExt=*/{ExtKind::SExt});
18755 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
18756 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18757 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
18758 /*RHSExt=*/{ExtKind::FPExt});
18759 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
18760 RHS.SupportsBF16Ext)
18761 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18762 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
18763 /*RHSExt=*/{ExtKind::BF16Ext});
18764 return std::nullopt;
18765}
18766
18767/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
18768/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
18769/// are zext) and LHS and RHS can be folded into Root.
18770///
18771/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18772/// can be used to apply the pattern.
18773static std::optional<CombineResult>
18774canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
18775 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18776 const RISCVSubtarget &Subtarget) {
18777 return canFoldToVWWithSameExtensionImpl(
18778 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
18779 Subtarget);
18780}
18781
18782/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
18783///
18784/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18785/// can be used to apply the pattern.
18786static std::optional<CombineResult>
18787canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18788 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18789 const RISCVSubtarget &Subtarget) {
18790 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
18791 Subtarget);
18792}
18793
18794/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
18795///
18796/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18797/// can be used to apply the pattern.
18798static std::optional<CombineResult>
18799canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
18800 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18801 const RISCVSubtarget &Subtarget) {
18802 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
18803 Subtarget);
18804}
18805
18806/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
18807///
18808/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18809/// can be used to apply the pattern.
18810static std::optional<CombineResult>
18811canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
18812 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18813 const RISCVSubtarget &Subtarget) {
18814 if (RHS.SupportsFPExt)
18815 return CombineResult(
18816 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
18817 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
18818
18819 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
18820 // sext/zext?
18821 // Control this behavior behind an option (AllowSplatInVW_W) for testing
18822 // purposes.
18823 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
18824 return CombineResult(
18825 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
18826 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
18827 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
18828 return CombineResult(
18829 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
18830 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
18831 return std::nullopt;
18832}
18833
18834/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
18835///
18836/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18837/// can be used to apply the pattern.
18838static std::optional<CombineResult>
18839canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18840 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18841 const RISCVSubtarget &Subtarget) {
18842 if (LHS.SupportsSExt)
18843 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
18844 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18845 /*RHSExt=*/std::nullopt);
18846 return std::nullopt;
18847}
18848
18849/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
18850///
18851/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18852/// can be used to apply the pattern.
18853static std::optional<CombineResult>
18854canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18855 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18856 const RISCVSubtarget &Subtarget) {
18857 if (LHS.SupportsZExt)
18858 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
18859 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
18860 /*RHSExt=*/std::nullopt);
18861 return std::nullopt;
18862}
18863
18864/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
18865///
18866/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18867/// can be used to apply the pattern.
18868static std::optional<CombineResult>
18869canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18870 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18871 const RISCVSubtarget &Subtarget) {
18872 if (LHS.SupportsFPExt)
18873 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18874 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
18875 /*RHSExt=*/std::nullopt);
18876 return std::nullopt;
18877}
18878
18879/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
18880///
18881/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18882/// can be used to apply the pattern.
18883static std::optional<CombineResult>
18884canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
18885 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18886 const RISCVSubtarget &Subtarget) {
18887
18888 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
18889 return std::nullopt;
18890 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
18891 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18892 /*RHSExt=*/{ExtKind::ZExt});
18893}
18894
18896NodeExtensionHelper::getSupportedFoldings(const SDNode *Root,
18897 const RISCVSubtarget &Subtarget) {
18898 SmallVector<CombineToTry> Strategies;
18899 switch (Root->getOpcode()) {
18900 case ISD::ADD:
18901 case ISD::SUB:
18902 case ISD::OR:
18903 case RISCVISD::ADD_VL:
18904 case RISCVISD::SUB_VL:
18905 case RISCVISD::OR_VL:
18906 case RISCVISD::FADD_VL:
18907 case RISCVISD::FSUB_VL:
18908 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
18909 Strategies.push_back(canFoldToVWWithSameExtension);
18910 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
18911 Strategies.push_back(canFoldToVW_W);
18912 break;
18913 case RISCVISD::FMUL_VL:
18914 case RISCVISD::VFMADD_VL:
18915 case RISCVISD::VFMSUB_VL:
18916 case RISCVISD::VFNMADD_VL:
18917 case RISCVISD::VFNMSUB_VL:
18918 Strategies.push_back(canFoldToVWWithSameExtension);
18919 if (Subtarget.hasStdExtZvfbfa() && Root->getOpcode() != RISCVISD::FMUL_VL)
18920 // TODO: Once other widen operations are supported we can merge
18921 // canFoldToVWWithSameExtension and canFoldToVWWithSameExtBF16.
18922 Strategies.push_back(canFoldToVWWithSameExtBF16);
18923 else if (Subtarget.hasStdExtZvfbfwma() &&
18924 Root->getOpcode() == RISCVISD::VFMADD_VL)
18925 Strategies.push_back(canFoldToVWWithSameExtBF16);
18926 break;
18927 case ISD::MUL:
18928 case RISCVISD::MUL_VL:
18929 // mul -> vwmul(u)
18930 Strategies.push_back(canFoldToVWWithSameExtension);
18931 // mul -> vwmulsu
18932 Strategies.push_back(canFoldToVW_SU);
18933 break;
18934 case ISD::SHL:
18935 case RISCVISD::SHL_VL:
18936 // shl -> vwsll
18937 Strategies.push_back(canFoldToVWWithSameExtZEXT);
18938 break;
18939 case RISCVISD::VWADD_W_VL:
18940 case RISCVISD::VWSUB_W_VL:
18941 // vwadd_w|vwsub_w -> vwadd|vwsub
18942 Strategies.push_back(canFoldToVWWithSEXT);
18943 break;
18944 case RISCVISD::VWADDU_W_VL:
18945 case RISCVISD::VWSUBU_W_VL:
18946 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
18947 Strategies.push_back(canFoldToVWWithZEXT);
18948 break;
18949 case RISCVISD::VFWADD_W_VL:
18950 case RISCVISD::VFWSUB_W_VL:
18951 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
18952 Strategies.push_back(canFoldToVWWithFPEXT);
18953 break;
18954 default:
18955 llvm_unreachable("Unexpected opcode");
18956 }
18957 return Strategies;
18958}
18959} // End anonymous namespace.
18960
18962 // TODO: Extend this to other binops using generic identity logic
18963 assert(N->getOpcode() == RISCVISD::ADD_VL);
18964 SDValue A = N->getOperand(0);
18965 SDValue B = N->getOperand(1);
18966 SDValue Passthru = N->getOperand(2);
18967 if (!Passthru.isUndef())
18968 // TODO:This could be a vmerge instead
18969 return SDValue();
18970 ;
18972 return A;
18973 // Peek through fixed to scalable
18974 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
18975 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
18976 return A;
18977 return SDValue();
18978}
18979
18980/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
18981/// The supported combines are:
18982/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
18983/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
18984/// mul | mul_vl -> vwmul(u) | vwmul_su
18985/// shl | shl_vl -> vwsll
18986/// fadd_vl -> vfwadd | vfwadd_w
18987/// fsub_vl -> vfwsub | vfwsub_w
18988/// fmul_vl -> vfwmul
18989/// vwadd_w(u) -> vwadd(u)
18990/// vwsub_w(u) -> vwsub(u)
18991/// vfwadd_w -> vfwadd
18992/// vfwsub_w -> vfwsub
18995 const RISCVSubtarget &Subtarget) {
18996 SelectionDAG &DAG = DCI.DAG;
18997 if (DCI.isBeforeLegalize())
18998 return SDValue();
18999
19000 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
19001 return SDValue();
19002
19003 SmallVector<SDNode *> Worklist;
19004 SmallPtrSet<SDNode *, 8> Inserted;
19005 SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
19006 Worklist.push_back(N);
19007 Inserted.insert(N);
19008 SmallVector<CombineResult> CombinesToApply;
19009
19010 while (!Worklist.empty()) {
19011 SDNode *Root = Worklist.pop_back_val();
19012
19013 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
19014 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
19015 auto AppendUsersIfNeeded =
19016 [&Worklist, &Subtarget, &Inserted,
19017 &ExtensionsToRemove](const NodeExtensionHelper &Op) {
19018 if (Op.needToPromoteOtherUsers()) {
19019 // Remember that we're supposed to remove this extension.
19020 ExtensionsToRemove.insert(Op.OrigOperand.getNode());
19021 for (SDUse &Use : Op.OrigOperand->uses()) {
19022 SDNode *TheUser = Use.getUser();
19023 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
19024 return false;
19025 // We only support the first 2 operands of FMA.
19026 if (Use.getOperandNo() >= 2)
19027 return false;
19028 if (Inserted.insert(TheUser).second)
19029 Worklist.push_back(TheUser);
19030 }
19031 }
19032 return true;
19033 };
19034
19035 // Control the compile time by limiting the number of node we look at in
19036 // total.
19037 if (Inserted.size() > ExtensionMaxWebSize)
19038 return SDValue();
19039
19041 NodeExtensionHelper::getSupportedFoldings(Root, Subtarget);
19042
19043 assert(!FoldingStrategies.empty() && "Nothing to be folded");
19044 bool Matched = false;
19045 for (int Attempt = 0;
19046 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
19047 ++Attempt) {
19048
19049 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
19050 FoldingStrategies) {
19051 std::optional<CombineResult> Res =
19052 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
19053 if (Res) {
19054 // If this strategy wouldn't remove an extension we're supposed to
19055 // remove, reject it.
19056 if (!Res->LHSExt.has_value() &&
19057 ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
19058 continue;
19059 if (!Res->RHSExt.has_value() &&
19060 ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
19061 continue;
19062
19063 Matched = true;
19064 CombinesToApply.push_back(*Res);
19065 // All the inputs that are extended need to be folded, otherwise
19066 // we would be leaving the old input (since it is may still be used),
19067 // and the new one.
19068 if (Res->LHSExt.has_value())
19069 if (!AppendUsersIfNeeded(LHS))
19070 return SDValue();
19071 if (Res->RHSExt.has_value())
19072 if (!AppendUsersIfNeeded(RHS))
19073 return SDValue();
19074 break;
19075 }
19076 }
19077 std::swap(LHS, RHS);
19078 }
19079 // Right now we do an all or nothing approach.
19080 if (!Matched)
19081 return SDValue();
19082 }
19083 // Store the value for the replacement of the input node separately.
19084 SDValue InputRootReplacement;
19085 // We do the RAUW after we materialize all the combines, because some replaced
19086 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
19087 // some of these nodes may appear in the NodeExtensionHelpers of some of the
19088 // yet-to-be-visited CombinesToApply roots.
19090 ValuesToReplace.reserve(CombinesToApply.size());
19091 for (CombineResult Res : CombinesToApply) {
19092 SDValue NewValue = Res.materialize(DAG, Subtarget);
19093 if (!InputRootReplacement) {
19094 assert(Res.Root == N &&
19095 "First element is expected to be the current node");
19096 InputRootReplacement = NewValue;
19097 } else {
19098 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
19099 }
19100 }
19101 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
19102 DCI.CombineTo(OldNewValues.first.getNode(), OldNewValues.second);
19103 }
19104 return InputRootReplacement;
19105}
19106
19107// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
19108// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
19109// y will be the Passthru and cond will be the Mask.
19111 unsigned Opc = N->getOpcode();
19112 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
19113 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
19114
19115 SDValue Y = N->getOperand(0);
19116 SDValue MergeOp = N->getOperand(1);
19117 unsigned MergeOpc = MergeOp.getOpcode();
19118
19119 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
19120 return SDValue();
19121
19122 SDValue X = MergeOp->getOperand(1);
19123
19124 if (!MergeOp.hasOneUse())
19125 return SDValue();
19126
19127 // Passthru should be undef
19128 SDValue Passthru = N->getOperand(2);
19129 if (!Passthru.isUndef())
19130 return SDValue();
19131
19132 // Mask should be all ones
19133 SDValue Mask = N->getOperand(3);
19134 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
19135 return SDValue();
19136
19137 // False value of MergeOp should be all zeros
19138 SDValue Z = MergeOp->getOperand(2);
19139
19140 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
19141 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
19142 Z = Z.getOperand(1);
19143
19144 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
19145 return SDValue();
19146
19147 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
19148 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
19149 N->getFlags());
19150}
19151
19152// vwaddu C (vabd A B) -> vwabda(A B C)
19153// vwaddu C (vabdu A B) -> vwabdau(A B C)
19155 const RISCVSubtarget &Subtarget) {
19156 if (!Subtarget.hasStdExtZvabd())
19157 return SDValue();
19158
19159 MVT VT = N->getSimpleValueType(0);
19160 if (VT.getVectorElementType() != MVT::i8 &&
19161 VT.getVectorElementType() != MVT::i16)
19162 return SDValue();
19163
19164 SDValue Op0 = N->getOperand(0);
19165 SDValue Op1 = N->getOperand(1);
19166 SDValue Passthru = N->getOperand(2);
19167 if (!Passthru->isUndef())
19168 return SDValue();
19169
19170 SDValue Mask = N->getOperand(3);
19171 SDValue VL = N->getOperand(4);
19172 auto IsABD = [](SDValue Op) {
19173 if (Op->getOpcode() != RISCVISD::ABDS_VL &&
19174 Op->getOpcode() != RISCVISD::ABDU_VL)
19175 return SDValue();
19176 return Op;
19177 };
19178
19179 SDValue Diff = IsABD(Op0);
19180 Diff = Diff ? Diff : IsABD(Op1);
19181 if (!Diff)
19182 return SDValue();
19183 SDValue Acc = Diff == Op0 ? Op1 : Op0;
19184
19185 SDLoc DL(N);
19186 Acc = DAG.getNode(RISCVISD::VZEXT_VL, DL, VT, Acc, Mask, VL);
19187 SDValue Result = DAG.getNode(
19188 Diff.getOpcode() == RISCVISD::ABDS_VL ? RISCVISD::VWABDA_VL
19189 : RISCVISD::VWABDAU_VL,
19190 DL, VT, Diff.getOperand(0), Diff.getOperand(1), Acc, Mask, VL);
19191 return Result;
19192}
19193
19196 const RISCVSubtarget &Subtarget) {
19197 [[maybe_unused]] unsigned Opc = N->getOpcode();
19198 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
19199 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
19200
19201 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19202 return V;
19203
19204 return combineVWADDSUBWSelect(N, DCI.DAG);
19205}
19206
19207// Helper function for performMemPairCombine.
19208// Try to combine the memory loads/stores LSNode1 and LSNode2
19209// into a single memory pair operation.
19211 LSBaseSDNode *LSNode2, SDValue BasePtr,
19212 uint64_t Imm) {
19214 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
19215
19216 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
19217 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
19218 return SDValue();
19219
19221 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
19222
19223 // The new operation has twice the width.
19224 MVT XLenVT = Subtarget.getXLenVT();
19225 EVT MemVT = LSNode1->getMemoryVT();
19226 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
19227 MachineMemOperand *MMO = LSNode1->getMemOperand();
19229 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
19230
19231 if (LSNode1->getOpcode() == ISD::LOAD) {
19232 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
19233 unsigned Opcode;
19234 if (MemVT == MVT::i32)
19235 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
19236 else
19237 Opcode = RISCVISD::TH_LDD;
19238
19239 SDValue Res = DAG.getMemIntrinsicNode(
19240 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
19241 {LSNode1->getChain(), BasePtr,
19242 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
19243 NewMemVT, NewMMO);
19244
19245 SDValue Node1 =
19246 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
19247 SDValue Node2 =
19248 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
19249
19250 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
19251 return Node1;
19252 } else {
19253 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
19254
19255 SDValue Res = DAG.getMemIntrinsicNode(
19256 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
19257 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
19258 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
19259 NewMemVT, NewMMO);
19260
19261 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
19262 return Res;
19263 }
19264}
19265
19266// Try to combine two adjacent loads/stores to a single pair instruction from
19267// the XTHeadMemPair vendor extension.
19270 SelectionDAG &DAG = DCI.DAG;
19272 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
19273
19274 // Target does not support load/store pair.
19275 if (!Subtarget.hasVendorXTHeadMemPair())
19276 return SDValue();
19277
19278 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
19279 EVT MemVT = LSNode1->getMemoryVT();
19280 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
19281
19282 // No volatile, indexed or atomic loads/stores.
19283 if (!LSNode1->isSimple() || LSNode1->isIndexed())
19284 return SDValue();
19285
19286 // Function to get a base + constant representation from a memory value.
19287 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
19288 if (Ptr->getOpcode() == ISD::ADD)
19289 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
19290 return {Ptr->getOperand(0), C1->getZExtValue()};
19291 return {Ptr, 0};
19292 };
19293
19294 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
19295
19296 SDValue Chain = N->getOperand(0);
19297 for (SDUse &Use : Chain->uses()) {
19298 if (Use.getUser() != N && Use.getResNo() == 0 &&
19299 Use.getUser()->getOpcode() == N->getOpcode()) {
19301
19302 // No volatile, indexed or atomic loads/stores.
19303 if (!LSNode2->isSimple() || LSNode2->isIndexed())
19304 continue;
19305
19306 // Check if LSNode1 and LSNode2 have the same type and extension.
19307 if (LSNode1->getOpcode() == ISD::LOAD)
19308 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
19310 continue;
19311
19312 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
19313 continue;
19314
19315 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
19316
19317 // Check if the base pointer is the same for both instruction.
19318 if (Base1 != Base2)
19319 continue;
19320
19321 // Check if the offsets match the XTHeadMemPair encoding constraints.
19322 bool Valid = false;
19323 if (MemVT == MVT::i32) {
19324 // Check for adjacent i32 values and a 2-bit index.
19325 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
19326 Valid = true;
19327 } else if (MemVT == MVT::i64) {
19328 // Check for adjacent i64 values and a 2-bit index.
19329 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
19330 Valid = true;
19331 }
19332
19333 if (!Valid)
19334 continue;
19335
19336 // Try to combine.
19337 if (SDValue Res =
19338 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
19339 return Res;
19340 }
19341 }
19342
19343 return SDValue();
19344}
19345
19346// Fold
19347// (fp_to_int (froundeven X)) -> fcvt X, rne
19348// (fp_to_int (ftrunc X)) -> fcvt X, rtz
19349// (fp_to_int (ffloor X)) -> fcvt X, rdn
19350// (fp_to_int (fceil X)) -> fcvt X, rup
19351// (fp_to_int (fround X)) -> fcvt X, rmm
19352// (fp_to_int (frint X)) -> fcvt X
19355 const RISCVSubtarget &Subtarget) {
19356 SelectionDAG &DAG = DCI.DAG;
19357 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19358 MVT XLenVT = Subtarget.getXLenVT();
19359
19360 SDValue Src = N->getOperand(0);
19361
19362 // Don't do this for strict-fp Src.
19363 if (Src->isStrictFPOpcode())
19364 return SDValue();
19365
19366 // Ensure the FP type is legal.
19367 if (!TLI.isTypeLegal(Src.getValueType()))
19368 return SDValue();
19369
19370 // Don't do this for f16 with Zfhmin and not Zfh.
19371 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
19372 return SDValue();
19373
19374 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
19375 // If the result is invalid, we didn't find a foldable instruction.
19376 if (FRM == RISCVFPRndMode::Invalid)
19377 return SDValue();
19378
19379 SDLoc DL(N);
19380 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
19381 EVT VT = N->getValueType(0);
19382
19383 if (VT.isVector() && TLI.isTypeLegal(VT)) {
19384 MVT SrcVT = Src.getSimpleValueType();
19385 MVT SrcContainerVT = SrcVT;
19386 MVT ContainerVT = VT.getSimpleVT();
19387 SDValue XVal = Src.getOperand(0);
19388
19389 // For widening and narrowing conversions we just combine it into a
19390 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
19391 // end up getting lowered to their appropriate pseudo instructions based on
19392 // their operand types
19393 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
19394 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
19395 return SDValue();
19396
19397 // Make fixed-length vectors scalable first
19398 if (SrcVT.isFixedLengthVector()) {
19399 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
19400 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
19401 ContainerVT =
19402 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
19403 }
19404
19405 auto [Mask, VL] =
19406 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
19407
19408 SDValue FpToInt;
19409 if (FRM == RISCVFPRndMode::RTZ) {
19410 // Use the dedicated trunc static rounding mode if we're truncating so we
19411 // don't need to generate calls to fsrmi/fsrm
19412 unsigned Opc =
19413 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
19414 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
19415 } else {
19416 unsigned Opc =
19417 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
19418 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
19419 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
19420 }
19421
19422 // If converted from fixed-length to scalable, convert back
19423 if (VT.isFixedLengthVector())
19424 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
19425
19426 return FpToInt;
19427 }
19428
19429 // Only handle XLen or i32 types. Other types narrower than XLen will
19430 // eventually be legalized to XLenVT.
19431 if (VT != MVT::i32 && VT != XLenVT)
19432 return SDValue();
19433
19434 unsigned Opc;
19435 if (VT == XLenVT)
19436 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
19437 else
19438 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
19439
19440 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
19441 DAG.getTargetConstant(FRM, DL, XLenVT));
19442 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
19443}
19444
19445// Fold
19446// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
19447// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
19448// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
19449// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
19450// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
19451// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
19454 const RISCVSubtarget &Subtarget) {
19455 SelectionDAG &DAG = DCI.DAG;
19456 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19457 MVT XLenVT = Subtarget.getXLenVT();
19458
19459 // Only handle XLen types. Other types narrower than XLen will eventually be
19460 // legalized to XLenVT.
19461 EVT DstVT = N->getValueType(0);
19462 if (DstVT != XLenVT)
19463 return SDValue();
19464
19465 SDValue Src = N->getOperand(0);
19466
19467 // Don't do this for strict-fp Src.
19468 if (Src->isStrictFPOpcode())
19469 return SDValue();
19470
19471 // Ensure the FP type is also legal.
19472 if (!TLI.isTypeLegal(Src.getValueType()))
19473 return SDValue();
19474
19475 // Don't do this for f16 with Zfhmin and not Zfh.
19476 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
19477 return SDValue();
19478
19479 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
19480
19481 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
19482 if (FRM == RISCVFPRndMode::Invalid)
19483 return SDValue();
19484
19485 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
19486
19487 unsigned Opc;
19488 if (SatVT == DstVT)
19489 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
19490 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
19491 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
19492 else
19493 return SDValue();
19494 // FIXME: Support other SatVTs by clamping before or after the conversion.
19495
19496 Src = Src.getOperand(0);
19497
19498 SDLoc DL(N);
19499 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
19500 DAG.getTargetConstant(FRM, DL, XLenVT));
19501
19502 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
19503 // extend.
19504 if (Opc == RISCVISD::FCVT_WU_RV64)
19505 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
19506
19507 // RISC-V FP-to-int conversions saturate to the destination register size, but
19508 // don't produce 0 for nan.
19509 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
19510 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
19511}
19512
19513// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
19514// smaller than XLenVT.
19516 const RISCVSubtarget &Subtarget) {
19517 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
19518
19519 SDValue Src = N->getOperand(0);
19520 if (Src.getOpcode() != ISD::BSWAP)
19521 return SDValue();
19522
19523 EVT VT = N->getValueType(0);
19524 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
19526 return SDValue();
19527
19528 SDLoc DL(N);
19529 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
19530}
19531
19533 const RISCVSubtarget &Subtarget) {
19534 // Fold:
19535 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
19536
19537 // Check if its first operand is a vp.load.
19538 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
19539 if (!VPLoad)
19540 return SDValue();
19541
19542 EVT LoadVT = VPLoad->getValueType(0);
19543 // We do not have a strided_load version for masks, and the evl of vp.reverse
19544 // and vp.load should always be the same.
19545 if (!LoadVT.getVectorElementType().isByteSized() ||
19546 N->getOperand(2) != VPLoad->getVectorLength() ||
19547 !N->getOperand(0).hasOneUse())
19548 return SDValue();
19549
19550 SDValue LoadMask = VPLoad->getMask();
19551 // If Mask is all ones, then load is unmasked and can be reversed.
19552 if (!isOneOrOneSplat(LoadMask)) {
19553 // If the mask is not all ones, we can reverse the load if the mask was also
19554 // reversed by a vp.reverse with the same EVL.
19555 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
19556 LoadMask.getOperand(2) != VPLoad->getVectorLength())
19557 return SDValue();
19558 LoadMask = LoadMask.getOperand(0);
19559 }
19560
19561 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
19562 SDLoc DL(N);
19563 MVT XLenVT = Subtarget.getXLenVT();
19564 SDValue NumElem = VPLoad->getVectorLength();
19565 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
19566
19567 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
19568 DAG.getConstant(1, DL, XLenVT));
19569 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
19570 DAG.getConstant(ElemWidthByte, DL, XLenVT));
19571 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
19572 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
19573
19575 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
19577 PtrInfo, VPLoad->getMemOperand()->getFlags(),
19578 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
19579
19580 SDValue Ret = DAG.getStridedLoadVP(
19581 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
19582 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
19583
19584 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
19585
19586 return Ret;
19587}
19588
19590 const RISCVSubtarget &Subtarget) {
19591 // Fold:
19592 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
19593 // -1, MASK)
19594 auto *VPStore = cast<VPStoreSDNode>(N);
19595
19596 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
19597 return SDValue();
19598
19599 SDValue VPReverse = VPStore->getValue();
19600 EVT ReverseVT = VPReverse->getValueType(0);
19601
19602 // We do not have a strided_store version for masks, and the evl of vp.reverse
19603 // and vp.store should always be the same.
19604 if (!ReverseVT.getVectorElementType().isByteSized() ||
19605 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
19606 !VPReverse.hasOneUse())
19607 return SDValue();
19608
19609 SDValue StoreMask = VPStore->getMask();
19610 // If Mask is all ones, then load is unmasked and can be reversed.
19611 if (!isOneOrOneSplat(StoreMask)) {
19612 // If the mask is not all ones, we can reverse the store if the mask was
19613 // also reversed by a vp.reverse with the same EVL.
19614 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
19615 StoreMask.getOperand(2) != VPStore->getVectorLength())
19616 return SDValue();
19617 StoreMask = StoreMask.getOperand(0);
19618 }
19619
19620 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
19621 SDLoc DL(N);
19622 MVT XLenVT = Subtarget.getXLenVT();
19623 SDValue NumElem = VPStore->getVectorLength();
19624 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
19625
19626 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
19627 DAG.getConstant(1, DL, XLenVT));
19628 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
19629 DAG.getConstant(ElemWidthByte, DL, XLenVT));
19630 SDValue Base =
19631 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
19632 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
19633
19635 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
19637 PtrInfo, VPStore->getMemOperand()->getFlags(),
19638 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
19639
19640 return DAG.getStridedStoreVP(
19641 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
19642 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
19643 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
19644 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
19645}
19646
19647// Peephole avgceil pattern.
19648// %1 = zext <N x i8> %a to <N x i32>
19649// %2 = zext <N x i8> %b to <N x i32>
19650// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
19651// %4 = add nuw nsw <N x i32> %3, %2
19652// %5 = lshr <N x i32> %4, splat (i32 1)
19653// %6 = trunc <N x i32> %5 to <N x i8>
19655 const RISCVSubtarget &Subtarget) {
19656 EVT VT = N->getValueType(0);
19657
19658 // Ignore fixed vectors.
19659 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19660 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
19661 return SDValue();
19662
19663 SDValue In = N->getOperand(0);
19664 SDValue Mask = N->getOperand(1);
19665 SDValue VL = N->getOperand(2);
19666
19667 // Input should be a vp_srl with same mask and VL.
19668 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
19669 In.getOperand(3) != VL)
19670 return SDValue();
19671
19672 // Shift amount should be 1.
19673 if (!isOneOrOneSplat(In.getOperand(1)))
19674 return SDValue();
19675
19676 // Shifted value should be a vp_add with same mask and VL.
19677 SDValue LHS = In.getOperand(0);
19678 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
19679 LHS.getOperand(3) != VL)
19680 return SDValue();
19681
19682 SDValue Operands[3];
19683
19684 // Matches another VP_ADD with same VL and Mask.
19685 auto FindAdd = [&](SDValue V, SDValue Other) {
19686 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
19687 V.getOperand(3) != VL)
19688 return false;
19689
19690 Operands[0] = Other;
19691 Operands[1] = V.getOperand(1);
19692 Operands[2] = V.getOperand(0);
19693 return true;
19694 };
19695
19696 // We need to find another VP_ADD in one of the operands.
19697 SDValue LHS0 = LHS.getOperand(0);
19698 SDValue LHS1 = LHS.getOperand(1);
19699 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
19700 return SDValue();
19701
19702 // Now we have three operands of two additions. Check that one of them is a
19703 // constant vector with ones.
19704 auto I = llvm::find_if(Operands,
19705 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
19706 if (I == std::end(Operands))
19707 return SDValue();
19708 // We found a vector with ones, move if it to the end of the Operands array.
19709 std::swap(*I, Operands[2]);
19710
19711 // Make sure the other 2 operands can be promoted from the result type.
19712 for (SDValue Op : drop_end(Operands)) {
19713 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
19714 Op.getOperand(2) != VL)
19715 return SDValue();
19716 // Input must be the same size or smaller than our result.
19717 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
19718 return SDValue();
19719 }
19720
19721 // Pattern is detected.
19722 // Rebuild the zero extends in case the inputs are smaller than our result.
19723 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
19724 Operands[0].getOperand(0), Mask, VL);
19725 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
19726 Operands[1].getOperand(0), Mask, VL);
19727 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
19728 // mode.
19729 SDLoc DL(N);
19730 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
19731 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
19732}
19733
19734// Convert from one FMA opcode to another based on whether we are negating the
19735// multiply result and/or the accumulator.
19736// NOTE: Only supports RVV operations with VL.
19737static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
19738 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
19739 if (NegMul) {
19740 // clang-format off
19741 switch (Opcode) {
19742 default: llvm_unreachable("Unexpected opcode");
19743 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
19744 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
19745 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
19746 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
19747 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
19748 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
19749 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
19750 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
19751 }
19752 // clang-format on
19753 }
19754
19755 // Negating the accumulator changes ADD<->SUB.
19756 if (NegAcc) {
19757 // clang-format off
19758 switch (Opcode) {
19759 default: llvm_unreachable("Unexpected opcode");
19760 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
19761 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
19762 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
19763 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
19764 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
19765 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
19766 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
19767 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
19768 }
19769 // clang-format on
19770 }
19771
19772 return Opcode;
19773}
19774
19776 // Fold FNEG_VL into FMA opcodes.
19777 // The first operand of strict-fp is chain.
19778 bool IsStrict =
19779 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
19780 unsigned Offset = IsStrict ? 1 : 0;
19781 SDValue A = N->getOperand(0 + Offset);
19782 SDValue B = N->getOperand(1 + Offset);
19783 SDValue C = N->getOperand(2 + Offset);
19784 SDValue Mask = N->getOperand(3 + Offset);
19785 SDValue VL = N->getOperand(4 + Offset);
19786
19787 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
19788 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
19789 V.getOperand(2) == VL) {
19790 // Return the negated input.
19791 V = V.getOperand(0);
19792 return true;
19793 }
19794
19795 return false;
19796 };
19797
19798 bool NegA = invertIfNegative(A);
19799 bool NegB = invertIfNegative(B);
19800 bool NegC = invertIfNegative(C);
19801
19802 // If no operands are negated, we're done.
19803 if (!NegA && !NegB && !NegC)
19804 return SDValue();
19805
19806 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
19807 if (IsStrict)
19808 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
19809 {N->getOperand(0), A, B, C, Mask, VL});
19810 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
19811 VL);
19812}
19813
19816 const RISCVSubtarget &Subtarget) {
19817 SelectionDAG &DAG = DCI.DAG;
19818
19820 return V;
19821
19822 // FIXME: Ignore strict opcodes for now.
19823 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
19824 return SDValue();
19825
19826 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
19827}
19828
19830 const RISCVSubtarget &Subtarget) {
19831 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
19832
19833 EVT VT = N->getValueType(0);
19834
19835 if (VT != Subtarget.getXLenVT())
19836 return SDValue();
19837
19838 if (!isa<ConstantSDNode>(N->getOperand(1)))
19839 return SDValue();
19840 uint64_t ShAmt = N->getConstantOperandVal(1);
19841
19842 SDValue N0 = N->getOperand(0);
19843
19844 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
19845 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
19846 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
19847 unsigned ExtSize =
19848 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
19849 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
19850 N0.getOperand(0).hasOneUse() &&
19852 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
19853 if (LShAmt < ExtSize) {
19854 unsigned Size = VT.getSizeInBits();
19855 SDLoc ShlDL(N0.getOperand(0));
19856 SDValue Shl =
19857 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
19858 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
19859 SDLoc DL(N);
19860 return DAG.getNode(ISD::SRA, DL, VT, Shl,
19861 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
19862 }
19863 }
19864 }
19865
19866 if (ShAmt > 32 || VT != MVT::i64)
19867 return SDValue();
19868
19869 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
19870 // FIXME: Should this be a generic combine? There's a similar combine on X86.
19871 //
19872 // Also try these folds where an add or sub is in the middle.
19873 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
19874 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
19875 SDValue Shl;
19876 ConstantSDNode *AddC = nullptr;
19877
19878 // We might have an ADD or SUB between the SRA and SHL.
19879 bool IsAdd = N0.getOpcode() == ISD::ADD;
19880 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
19881 // Other operand needs to be a constant we can modify.
19882 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
19883 if (!AddC)
19884 return SDValue();
19885
19886 // AddC needs to have at least 32 trailing zeros.
19887 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
19888 return SDValue();
19889
19890 // All users should be a shift by constant less than or equal to 32. This
19891 // ensures we'll do this optimization for each of them to produce an
19892 // add/sub+sext_inreg they can all share.
19893 for (SDNode *U : N0->users()) {
19894 if (U->getOpcode() != ISD::SRA ||
19895 !isa<ConstantSDNode>(U->getOperand(1)) ||
19896 U->getConstantOperandVal(1) > 32)
19897 return SDValue();
19898 }
19899
19900 Shl = N0.getOperand(IsAdd ? 0 : 1);
19901 } else {
19902 // Not an ADD or SUB.
19903 Shl = N0;
19904 }
19905
19906 // Look for a shift left by 32.
19907 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
19908 Shl.getConstantOperandVal(1) != 32)
19909 return SDValue();
19910
19911 // We if we didn't look through an add/sub, then the shl should have one use.
19912 // If we did look through an add/sub, the sext_inreg we create is free so
19913 // we're only creating 2 new instructions. It's enough to only remove the
19914 // original sra+add/sub.
19915 if (!AddC && !Shl.hasOneUse())
19916 return SDValue();
19917
19918 SDLoc DL(N);
19919 SDValue In = Shl.getOperand(0);
19920
19921 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
19922 // constant.
19923 if (AddC) {
19924 SDValue ShiftedAddC =
19925 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
19926 if (IsAdd)
19927 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
19928 else
19929 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
19930 }
19931
19932 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
19933 DAG.getValueType(MVT::i32));
19934 if (ShAmt == 32)
19935 return SExt;
19936
19937 return DAG.getNode(
19938 ISD::SHL, DL, MVT::i64, SExt,
19939 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
19940}
19941
19942// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
19943// the result is used as the condition of a br_cc or select_cc we can invert,
19944// inverting the setcc is free, and Z is 0/1. Caller will invert the
19945// br_cc/select_cc.
19947 bool IsAnd = Cond.getOpcode() == ISD::AND;
19948 if (!IsAnd && Cond.getOpcode() != ISD::OR)
19949 return SDValue();
19950
19951 if (!Cond.hasOneUse())
19952 return SDValue();
19953
19954 SDValue Setcc = Cond.getOperand(0);
19955 SDValue Xor = Cond.getOperand(1);
19956 // Canonicalize setcc to LHS.
19957 if (Setcc.getOpcode() != ISD::SETCC)
19958 std::swap(Setcc, Xor);
19959 // LHS should be a setcc and RHS should be an xor.
19960 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
19961 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
19962 return SDValue();
19963
19964 // If the condition is an And, SimplifyDemandedBits may have changed
19965 // (xor Z, 1) to (not Z).
19966 SDValue Xor1 = Xor.getOperand(1);
19967 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
19968 return SDValue();
19969
19970 EVT VT = Cond.getValueType();
19971 SDValue Xor0 = Xor.getOperand(0);
19972
19973 // The LHS of the xor needs to be 0/1.
19975 if (!DAG.MaskedValueIsZero(Xor0, Mask))
19976 return SDValue();
19977
19978 // We can only invert integer setccs.
19979 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
19980 if (!SetCCOpVT.isScalarInteger())
19981 return SDValue();
19982
19983 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
19984 if (ISD::isIntEqualitySetCC(CCVal)) {
19985 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
19986 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
19987 Setcc.getOperand(1), CCVal);
19988 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
19989 // Invert (setlt 0, X) by converting to (setlt X, 1).
19990 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
19991 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
19992 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
19993 // (setlt X, 1) by converting to (setlt 0, X).
19994 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
19995 DAG.getConstant(0, SDLoc(Setcc), VT),
19996 Setcc.getOperand(0), CCVal);
19997 } else
19998 return SDValue();
19999
20000 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
20001 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
20002}
20003
20004// Perform common combines for BR_CC and SELECT_CC conditions.
20005static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
20006 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
20007 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20008
20009 // As far as arithmetic right shift always saves the sign,
20010 // shift can be omitted.
20011 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
20012 // setge (sra X, N), 0 -> setge X, 0
20013 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
20014 LHS.getOpcode() == ISD::SRA) {
20015 LHS = LHS.getOperand(0);
20016 return true;
20017 }
20018
20019 if (!ISD::isIntEqualitySetCC(CCVal))
20020 return false;
20021
20022 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
20023 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
20024 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
20025 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
20026 // If we're looking for eq 0 instead of ne 0, we need to invert the
20027 // condition.
20028 bool Invert = CCVal == ISD::SETEQ;
20029 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
20030 if (Invert)
20031 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
20032
20033 RHS = LHS.getOperand(1);
20034 LHS = LHS.getOperand(0);
20035 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
20036
20037 CC = DAG.getCondCode(CCVal);
20038 return true;
20039 }
20040
20041 // If XOR is reused and has an immediate that will fit in XORI,
20042 // do not fold.
20043 auto isXorImmediate = [](const SDValue &Op) -> bool {
20044 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
20045 return isInt<12>(XorCnst->getSExtValue());
20046 return false;
20047 };
20048 // Fold (X(i1) ^ 1) == 0 -> X != 0
20049 auto singleBitOp = [&DAG](const SDValue &VarOp,
20050 const SDValue &ConstOp) -> bool {
20051 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
20052 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
20053 return (XorCnst->getSExtValue() == 1) &&
20054 DAG.MaskedValueIsZero(VarOp, Mask);
20055 }
20056 return false;
20057 };
20058 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
20059 for (const SDNode *UserNode : Op->users()) {
20060 const unsigned Opcode = UserNode->getOpcode();
20061 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
20062 return false;
20063 }
20064 return true;
20065 };
20066 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
20067 const SDValue &LHS, const SDValue &RHS) -> bool {
20068 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
20069 (!isXorImmediate(LHS.getOperand(1)) ||
20070 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
20071 onlyUsedBySelectOrBR(LHS));
20072 };
20073 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
20074 if (isFoldableXorEq(LHS, RHS)) {
20075 RHS = LHS.getOperand(1);
20076 LHS = LHS.getOperand(0);
20077 return true;
20078 }
20079 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
20080 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
20081 const SDValue LHS0 = LHS.getOperand(0);
20082 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
20083 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
20084 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
20085 LHS0.getOperand(1), LHS.getOperand(1));
20086 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
20087 LHS0.getOperand(0), LHS.getOperand(1));
20088 return true;
20089 }
20090 }
20091
20092 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
20093 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
20094 LHS.getOperand(1).getOpcode() == ISD::Constant) {
20095 SDValue LHS0 = LHS.getOperand(0);
20096 if (LHS0.getOpcode() == ISD::AND &&
20097 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
20098 uint64_t Mask = LHS0.getConstantOperandVal(1);
20099 uint64_t ShAmt = LHS.getConstantOperandVal(1);
20100 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
20101 // XAndesPerf supports branch on test bit.
20102 if (Subtarget.hasVendorXAndesPerf()) {
20103 LHS =
20104 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
20105 DAG.getConstant(Mask, DL, LHS.getValueType()));
20106 return true;
20107 }
20108
20109 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
20110 CC = DAG.getCondCode(CCVal);
20111
20112 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
20113 LHS = LHS0.getOperand(0);
20114 if (ShAmt != 0)
20115 LHS =
20116 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
20117 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
20118 return true;
20119 }
20120 }
20121 }
20122
20123 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
20124 // This can occur when legalizing some floating point comparisons.
20125 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
20126 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
20127 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
20128 CC = DAG.getCondCode(CCVal);
20129 RHS = DAG.getConstant(0, DL, LHS.getValueType());
20130 return true;
20131 }
20132
20133 if (isNullConstant(RHS)) {
20134 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
20135 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
20136 CC = DAG.getCondCode(CCVal);
20137 LHS = NewCond;
20138 return true;
20139 }
20140 }
20141
20142 return false;
20143}
20144
20145// Fold
20146// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
20147// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
20148// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
20149// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
20150// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
20151// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
20153 SDValue TrueVal, SDValue FalseVal,
20154 bool Swapped) {
20155 bool Commutative = true;
20156 unsigned Opc = TrueVal.getOpcode();
20157 switch (Opc) {
20158 default:
20159 return SDValue();
20160 case ISD::SHL:
20161 case ISD::SRA:
20162 case ISD::SRL:
20163 case ISD::SUB:
20164 case ISD::ROTL:
20165 case ISD::ROTR:
20166 Commutative = false;
20167 break;
20168 case ISD::ADD:
20169 case ISD::OR:
20170 case ISD::XOR:
20171 case ISD::UMIN:
20172 case ISD::UMAX:
20173 break;
20174 }
20175
20176 if (!TrueVal.hasOneUse())
20177 return SDValue();
20178
20179 unsigned OpToFold;
20180 if (FalseVal == TrueVal.getOperand(0))
20181 OpToFold = 0;
20182 else if (Commutative && FalseVal == TrueVal.getOperand(1))
20183 OpToFold = 1;
20184 else
20185 return SDValue();
20186
20187 EVT VT = N->getValueType(0);
20188 SDLoc DL(N);
20189 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
20190 EVT OtherOpVT = OtherOp.getValueType();
20191 SDValue IdentityOperand =
20192 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
20193 if (!Commutative)
20194 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
20195 assert(IdentityOperand && "No identity operand!");
20196
20197 if (Swapped)
20198 std::swap(OtherOp, IdentityOperand);
20199 SDValue NewSel =
20200 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
20201 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
20202}
20203
20204// This tries to get rid of `select` and `icmp` that are being used to handle
20205// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
20207 SDValue Cond = N->getOperand(0);
20208
20209 // This represents either CTTZ or CTLZ instruction.
20210 SDValue CountZeroes;
20211
20212 SDValue ValOnZero;
20213
20214 if (Cond.getOpcode() != ISD::SETCC)
20215 return SDValue();
20216
20217 if (!isNullConstant(Cond->getOperand(1)))
20218 return SDValue();
20219
20220 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
20221 if (CCVal == ISD::CondCode::SETEQ) {
20222 CountZeroes = N->getOperand(2);
20223 ValOnZero = N->getOperand(1);
20224 } else if (CCVal == ISD::CondCode::SETNE) {
20225 CountZeroes = N->getOperand(1);
20226 ValOnZero = N->getOperand(2);
20227 } else {
20228 return SDValue();
20229 }
20230
20231 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
20232 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
20233 CountZeroes = CountZeroes.getOperand(0);
20234
20235 if (CountZeroes.getOpcode() != ISD::CTTZ &&
20236 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
20237 CountZeroes.getOpcode() != ISD::CTLZ &&
20238 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
20239 return SDValue();
20240
20241 if (!isNullConstant(ValOnZero))
20242 return SDValue();
20243
20244 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
20245 if (Cond->getOperand(0) != CountZeroesArgument)
20246 return SDValue();
20247
20248 unsigned BitWidth = CountZeroes.getValueSizeInBits();
20249 if (!isPowerOf2_32(BitWidth))
20250 return SDValue();
20251
20252 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
20253 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
20254 CountZeroes.getValueType(), CountZeroesArgument);
20255 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
20256 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
20257 CountZeroes.getValueType(), CountZeroesArgument);
20258 }
20259
20260 SDValue BitWidthMinusOne =
20261 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
20262
20263 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
20264 CountZeroes, BitWidthMinusOne);
20265 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
20266}
20267
20269 const RISCVSubtarget &Subtarget) {
20270 SDValue Cond = N->getOperand(0);
20271 SDValue True = N->getOperand(1);
20272 SDValue False = N->getOperand(2);
20273 SDLoc DL(N);
20274 EVT VT = N->getValueType(0);
20275 EVT CondVT = Cond.getValueType();
20276
20277 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
20278 return SDValue();
20279
20280 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
20281 // BEXTI, where C is power of 2.
20282 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
20283 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
20284 SDValue LHS = Cond.getOperand(0);
20285 SDValue RHS = Cond.getOperand(1);
20286 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20287 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
20288 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
20289 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
20290 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
20291 return DAG.getSelect(DL, VT,
20292 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
20293 False, True);
20294 }
20295 }
20296 return SDValue();
20297}
20298
20299static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
20300 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
20301 return false;
20302
20303 SwapCC = false;
20304 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
20305 std::swap(TrueVal, FalseVal);
20306 SwapCC = true;
20307 }
20308
20309 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
20310 return false;
20311
20312 SDValue A = FalseVal.getOperand(0);
20313 SDValue B = FalseVal.getOperand(1);
20314 // Add is commutative, so check both orders
20315 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
20316 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
20317}
20318
20319/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
20320/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
20321/// register pressure over the add followed by masked vsub sequence.
20323 SDLoc DL(N);
20324 EVT VT = N->getValueType(0);
20325 SDValue CC = N->getOperand(0);
20326 SDValue TrueVal = N->getOperand(1);
20327 SDValue FalseVal = N->getOperand(2);
20328
20329 bool SwapCC;
20330 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
20331 return SDValue();
20332
20333 SDValue Sub = SwapCC ? TrueVal : FalseVal;
20334 SDValue A = Sub.getOperand(0);
20335 SDValue B = Sub.getOperand(1);
20336
20337 // Arrange the select such that we can match a masked
20338 // vrsub.vi to perform the conditional negate
20339 SDValue NegB = DAG.getNegative(B, DL, VT);
20340 if (!SwapCC)
20341 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
20342 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
20343 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
20344}
20345
20347 const RISCVSubtarget &Subtarget) {
20348 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
20349 return Folded;
20350
20351 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
20352 return V;
20353
20354 if (Subtarget.hasConditionalMoveFusion())
20355 return SDValue();
20356
20357 SDValue TrueVal = N->getOperand(1);
20358 SDValue FalseVal = N->getOperand(2);
20359 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
20360 return V;
20361 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
20362}
20363
20364/// If we have a build_vector where each lane is binop X, C, where C
20365/// is a constant (but not necessarily the same constant on all lanes),
20366/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
20367/// We assume that materializing a constant build vector will be no more
20368/// expensive that performing O(n) binops.
20370 const RISCVSubtarget &Subtarget,
20371 const RISCVTargetLowering &TLI) {
20372 SDLoc DL(N);
20373 EVT VT = N->getValueType(0);
20374
20375 assert(!VT.isScalableVector() && "unexpected build vector");
20376
20377 if (VT.getVectorNumElements() == 1)
20378 return SDValue();
20379
20380 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
20381 if (!TLI.isBinOp(Opcode))
20382 return SDValue();
20383
20384 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
20385 return SDValue();
20386
20387 // This BUILD_VECTOR involves an implicit truncation, and sinking
20388 // truncates through binops is non-trivial.
20389 if (N->op_begin()->getValueType() != VT.getVectorElementType())
20390 return SDValue();
20391
20392 SmallVector<SDValue> LHSOps;
20393 SmallVector<SDValue> RHSOps;
20394 for (SDValue Op : N->ops()) {
20395 if (Op.isUndef()) {
20396 // We can't form a divide or remainder from undef.
20397 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
20398 return SDValue();
20399
20400 LHSOps.push_back(Op);
20401 RHSOps.push_back(Op);
20402 continue;
20403 }
20404
20405 // TODO: We can handle operations which have an neutral rhs value
20406 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
20407 // of profit in a more explicit manner.
20408 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
20409 return SDValue();
20410
20411 LHSOps.push_back(Op.getOperand(0));
20412 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
20413 !isa<ConstantFPSDNode>(Op.getOperand(1)))
20414 return SDValue();
20415 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
20416 // have different LHS and RHS types.
20417 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
20418 return SDValue();
20419
20420 RHSOps.push_back(Op.getOperand(1));
20421 }
20422
20423 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
20424 DAG.getBuildVector(VT, DL, RHSOps));
20425}
20426
20428 ElementCount OpEC = OpVT.getVectorElementCount();
20429 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
20430 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
20431}
20432
20433/// Given fixed length vectors A and B with equal element types, but possibly
20434/// different number of elements, return A + B where either A or B is zero
20435/// padded to the larger number of elements.
20437 SelectionDAG &DAG) {
20438 // NOTE: Manually doing the extract/add/insert scheme produces
20439 // significantly better codegen than the naive pad with zeros
20440 // and add scheme.
20441 EVT AVT = A.getValueType();
20442 EVT BVT = B.getValueType();
20445 std::swap(A, B);
20446 std::swap(AVT, BVT);
20447 }
20448
20449 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
20450 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
20451 return DAG.getInsertSubvector(DL, B, Res, 0);
20452}
20453
20455 SelectionDAG &DAG,
20456 const RISCVSubtarget &Subtarget,
20457 const RISCVTargetLowering &TLI) {
20458 using namespace SDPatternMatch;
20459 // Note: We intentionally do not check the legality of the reduction type.
20460 // We want to handle the m4/m8 *src* types, and thus need to let illegal
20461 // intermediate types flow through here.
20462 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
20464 return SDValue();
20465
20466 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
20467 // form).
20468 SDValue A, B;
20469 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
20470 SDValue AOpt = foldReduceOperandViaVDOTA4(A, DL, DAG, Subtarget, TLI);
20471 SDValue BOpt = foldReduceOperandViaVDOTA4(B, DL, DAG, Subtarget, TLI);
20472 if (AOpt || BOpt) {
20473 if (AOpt)
20474 A = AOpt;
20475 if (BOpt)
20476 B = BOpt;
20477 // From here, we're doing A + B with mixed types, implicitly zero
20478 // padded to the wider type. Note that we *don't* need the result
20479 // type to be the original VT, and in fact prefer narrower ones
20480 // if possible.
20481 return getZeroPaddedAdd(DL, A, B, DAG);
20482 }
20483 }
20484
20485 // zext a <--> partial_reduce_umla 0, a, 1
20486 // sext a <--> partial_reduce_smla 0, a, 1
20487 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
20488 InVec.getOpcode() == ISD::SIGN_EXTEND) {
20489 SDValue A = InVec.getOperand(0);
20490 EVT OpVT = A.getValueType();
20491 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
20492 return SDValue();
20493
20494 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
20495 SDValue B = DAG.getConstant(0x1, DL, OpVT);
20496 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
20497 unsigned Opc =
20499 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
20500 }
20501
20502 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
20503 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
20504 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
20505 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
20506 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
20507 return SDValue();
20508
20509 if (!ISD::isExtOpcode(A.getOpcode()))
20510 return SDValue();
20511
20512 EVT OpVT = A.getOperand(0).getValueType();
20513 if (OpVT.getVectorElementType() != MVT::i8 ||
20514 OpVT != B.getOperand(0).getValueType() ||
20515 !TLI.isTypeLegal(A.getValueType()))
20516 return SDValue();
20517
20518 unsigned Opc;
20519 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
20521 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
20522 B.getOpcode() == ISD::ZERO_EXTEND)
20524 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
20525 B.getOpcode() == ISD::ZERO_EXTEND)
20527 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
20528 B.getOpcode() == ISD::SIGN_EXTEND) {
20530 std::swap(A, B);
20531 } else
20532 return SDValue();
20533
20534 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
20535 return DAG.getNode(
20536 Opc, DL, ResVT,
20537 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
20538}
20539
20541 const RISCVSubtarget &Subtarget,
20542 const RISCVTargetLowering &TLI) {
20543 if (!Subtarget.hasStdExtZvdot4a8i())
20544 return SDValue();
20545
20546 SDLoc DL(N);
20547 EVT VT = N->getValueType(0);
20548 SDValue InVec = N->getOperand(0);
20549 if (SDValue V = foldReduceOperandViaVDOTA4(InVec, DL, DAG, Subtarget, TLI))
20550 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
20551 return SDValue();
20552}
20553
20555 const RISCVSubtarget &Subtarget,
20556 const RISCVTargetLowering &TLI) {
20557 SDValue InVec = N->getOperand(0);
20558 SDValue InVal = N->getOperand(1);
20559 SDValue EltNo = N->getOperand(2);
20560 SDLoc DL(N);
20561
20562 EVT VT = InVec.getValueType();
20563 if (VT.isScalableVector())
20564 return SDValue();
20565
20566 if (!InVec.hasOneUse())
20567 return SDValue();
20568
20569 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
20570 // move the insert_vector_elts into the arms of the binop. Note that
20571 // the new RHS must be a constant.
20572 const unsigned InVecOpcode = InVec->getOpcode();
20573 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
20574 InVal.hasOneUse()) {
20575 SDValue InVecLHS = InVec->getOperand(0);
20576 SDValue InVecRHS = InVec->getOperand(1);
20577 SDValue InValLHS = InVal->getOperand(0);
20578 SDValue InValRHS = InVal->getOperand(1);
20579
20581 return SDValue();
20582 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
20583 return SDValue();
20584 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
20585 // have different LHS and RHS types.
20586 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
20587 return SDValue();
20589 InVecLHS, InValLHS, EltNo);
20591 InVecRHS, InValRHS, EltNo);
20592 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
20593 }
20594
20595 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
20596 // move the insert_vector_elt to the source operand of the concat_vector.
20597 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
20598 return SDValue();
20599
20600 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
20601 if (!IndexC)
20602 return SDValue();
20603 unsigned Elt = IndexC->getZExtValue();
20604
20605 EVT ConcatVT = InVec.getOperand(0).getValueType();
20606 if (ConcatVT.getVectorElementType() != InVal.getValueType())
20607 return SDValue();
20608 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
20609 unsigned NewIdx = Elt % ConcatNumElts;
20610
20611 unsigned ConcatOpIdx = Elt / ConcatNumElts;
20612 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
20613 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
20614
20615 SmallVector<SDValue> ConcatOps(InVec->ops());
20616 ConcatOps[ConcatOpIdx] = ConcatOp;
20617 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20618}
20619
20620// If we're concatenating a series of vector loads like
20621// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
20622// Then we can turn this into a strided load by widening the vector elements
20623// vlse32 p, stride=n
20625 const RISCVSubtarget &Subtarget,
20626 const RISCVTargetLowering &TLI) {
20627 SDLoc DL(N);
20628 EVT VT = N->getValueType(0);
20629
20630 // Only perform this combine on legal MVTs.
20631 if (!TLI.isTypeLegal(VT))
20632 return SDValue();
20633
20634 // TODO: Potentially extend this to scalable vectors
20635 if (VT.isScalableVector())
20636 return SDValue();
20637
20638 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
20639 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
20640 !SDValue(BaseLd, 0).hasOneUse())
20641 return SDValue();
20642
20643 EVT BaseLdVT = BaseLd->getValueType(0);
20644
20645 // Go through the loads and check that they're strided
20647 Lds.push_back(BaseLd);
20648 Align Align = BaseLd->getAlign();
20649 for (SDValue Op : N->ops().drop_front()) {
20650 auto *Ld = dyn_cast<LoadSDNode>(Op);
20651 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
20652 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
20653 Ld->getValueType(0) != BaseLdVT)
20654 return SDValue();
20655
20656 Lds.push_back(Ld);
20657
20658 // The common alignment is the most restrictive (smallest) of all the loads
20659 Align = std::min(Align, Ld->getAlign());
20660 }
20661
20662 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
20663 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
20664 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
20665 // If the load ptrs can be decomposed into a common (Base + Index) with a
20666 // common constant stride, then return the constant stride.
20667 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
20668 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
20669 if (BIO1.equalBaseIndex(BIO2, DAG))
20670 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
20671
20672 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
20673 SDValue P1 = Ld1->getBasePtr();
20674 SDValue P2 = Ld2->getBasePtr();
20675 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
20676 return {{P2.getOperand(1), false}};
20677 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
20678 return {{P1.getOperand(1), true}};
20679
20680 return std::nullopt;
20681 };
20682
20683 // Get the distance between the first and second loads
20684 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
20685 if (!BaseDiff)
20686 return SDValue();
20687
20688 // Check all the loads are the same distance apart
20689 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
20690 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
20691 return SDValue();
20692
20693 // TODO: At this point, we've successfully matched a generalized gather
20694 // load. Maybe we should emit that, and then move the specialized
20695 // matchers above and below into a DAG combine?
20696
20697 // Get the widened scalar type, e.g. v4i8 -> i64
20698 unsigned WideScalarBitWidth =
20699 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
20700 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
20701
20702 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
20703 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
20704 if (!TLI.isTypeLegal(WideVecVT))
20705 return SDValue();
20706
20707 // Check that the operation is legal
20708 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
20709 return SDValue();
20710
20711 auto [StrideVariant, MustNegateStride] = *BaseDiff;
20712 SDValue Stride =
20713 std::holds_alternative<SDValue>(StrideVariant)
20714 ? std::get<SDValue>(StrideVariant)
20715 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
20716 Lds[0]->getOffset().getValueType());
20717 if (MustNegateStride)
20718 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
20719
20720 SDValue AllOneMask =
20721 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
20722 DAG.getConstant(1, DL, MVT::i1));
20723
20724 uint64_t MemSize;
20725 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
20726 ConstStride && ConstStride->getSExtValue() >= 0)
20727 // total size = (elsize * n) + (stride - elsize) * (n-1)
20728 // = elsize + stride * (n-1)
20729 MemSize = WideScalarVT.getSizeInBits() +
20730 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
20731 else
20732 // If Stride isn't constant, then we can't know how much it will load
20734
20736 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
20737 Align);
20738
20739 SDValue StridedLoad = DAG.getStridedLoadVP(
20740 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
20741 AllOneMask,
20742 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
20743
20744 for (SDValue Ld : N->ops())
20745 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
20746
20747 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
20748}
20749
20751 const RISCVSubtarget &Subtarget,
20752 const RISCVTargetLowering &TLI) {
20753 SDLoc DL(N);
20754 EVT VT = N->getValueType(0);
20755 const unsigned ElementSize = VT.getScalarSizeInBits();
20756 const unsigned NumElts = VT.getVectorNumElements();
20757 SDValue V1 = N->getOperand(0);
20758 SDValue V2 = N->getOperand(1);
20760 ArrayRef<int> Mask = SVN->getMask();
20761 MVT XLenVT = Subtarget.getXLenVT();
20762
20763 // Recognized a disguised select of add/sub.
20764 bool SwapCC;
20765 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
20766 matchSelectAddSub(V1, V2, SwapCC)) {
20767 SDValue Sub = SwapCC ? V1 : V2;
20768 SDValue A = Sub.getOperand(0);
20769 SDValue B = Sub.getOperand(1);
20770
20771 SmallVector<SDValue> MaskVals;
20772 for (int MaskIndex : Mask) {
20773 bool SelectMaskVal = (MaskIndex < (int)NumElts);
20774 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
20775 }
20776 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
20777 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
20778 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
20779
20780 // Arrange the select such that we can match a masked
20781 // vrsub.vi to perform the conditional negate
20782 SDValue NegB = DAG.getNegative(B, DL, VT);
20783 if (!SwapCC)
20784 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
20785 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
20786 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
20787 }
20788
20789 if (SDValue V = compressShuffleOfShuffles(SVN, Subtarget, DAG))
20790 return V;
20791
20792 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
20793 // during the combine phase before type legalization, and relies on
20794 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
20795 // for the source mask.
20796 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
20797 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
20798 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
20799 return SDValue();
20800
20801 SmallVector<int, 8> NewMask;
20802 narrowShuffleMaskElts(2, Mask, NewMask);
20803
20804 LLVMContext &C = *DAG.getContext();
20805 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
20806 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
20807 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
20808 DAG.getBitcast(NewVT, V2), NewMask);
20809 return DAG.getBitcast(VT, Res);
20810}
20811
20813 const RISCVSubtarget &Subtarget) {
20814 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
20815
20816 if (N->getValueType(0).isFixedLengthVector())
20817 return SDValue();
20818
20819 SDValue Addend = N->getOperand(0);
20820 SDValue MulOp = N->getOperand(1);
20821
20822 if (N->getOpcode() == RISCVISD::ADD_VL) {
20823 SDValue AddPassthruOp = N->getOperand(2);
20824 if (!AddPassthruOp.isUndef())
20825 return SDValue();
20826 }
20827
20828 auto IsVWMulOpc = [](unsigned Opc) {
20829 switch (Opc) {
20830 case RISCVISD::VWMUL_VL:
20831 case RISCVISD::VWMULU_VL:
20832 case RISCVISD::VWMULSU_VL:
20833 return true;
20834 default:
20835 return false;
20836 }
20837 };
20838
20839 if (!IsVWMulOpc(MulOp.getOpcode()))
20840 std::swap(Addend, MulOp);
20841
20842 if (!IsVWMulOpc(MulOp.getOpcode()))
20843 return SDValue();
20844
20845 SDValue MulPassthruOp = MulOp.getOperand(2);
20846
20847 if (!MulPassthruOp.isUndef())
20848 return SDValue();
20849
20850 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
20851 const RISCVSubtarget &Subtarget) {
20852 if (N->getOpcode() == ISD::ADD) {
20853 SDLoc DL(N);
20854 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
20855 Subtarget);
20856 }
20857 return std::make_pair(N->getOperand(3), N->getOperand(4));
20858 }(N, DAG, Subtarget);
20859
20860 SDValue MulMask = MulOp.getOperand(3);
20861 SDValue MulVL = MulOp.getOperand(4);
20862
20863 if (AddMask != MulMask || AddVL != MulVL)
20864 return SDValue();
20865
20866 const auto &TSInfo =
20867 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
20868 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
20869
20870 SDLoc DL(N);
20871 EVT VT = N->getValueType(0);
20872 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
20873 AddVL};
20874 return DAG.getNode(Opc, DL, VT, Ops);
20875}
20876
20878 const RISCVSubtarget &Subtarget) {
20879
20880 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
20881
20882 if (!N->getValueType(0).isVector())
20883 return SDValue();
20884
20885 SDValue Addend = N->getOperand(0);
20886 SDValue DotOp = N->getOperand(1);
20887
20888 if (N->getOpcode() == RISCVISD::ADD_VL) {
20889 SDValue AddPassthruOp = N->getOperand(2);
20890 if (!AddPassthruOp.isUndef())
20891 return SDValue();
20892 }
20893
20894 auto IsVdota4Opc = [](unsigned Opc) {
20895 switch (Opc) {
20896 case RISCVISD::VDOTA4_VL:
20897 case RISCVISD::VDOTA4U_VL:
20898 case RISCVISD::VDOTA4SU_VL:
20899 return true;
20900 default:
20901 return false;
20902 }
20903 };
20904
20905 if (!IsVdota4Opc(DotOp.getOpcode()))
20906 std::swap(Addend, DotOp);
20907
20908 if (!IsVdota4Opc(DotOp.getOpcode()))
20909 return SDValue();
20910
20911 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
20912 const RISCVSubtarget &Subtarget) {
20913 if (N->getOpcode() == ISD::ADD) {
20914 SDLoc DL(N);
20915 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
20916 Subtarget);
20917 }
20918 return std::make_pair(N->getOperand(3), N->getOperand(4));
20919 }(N, DAG, Subtarget);
20920
20921 SDValue MulVL = DotOp.getOperand(4);
20922 if (AddVL != MulVL)
20923 return SDValue();
20924
20925 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
20926 AddMask.getOperand(0) != MulVL)
20927 return SDValue();
20928
20929 SDValue AccumOp = DotOp.getOperand(2);
20930 SDLoc DL(N);
20931 EVT VT = N->getValueType(0);
20932 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
20933 DAG.getUNDEF(VT), AddMask, AddVL);
20934
20935 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
20936 DotOp.getOperand(3), DotOp->getOperand(4)};
20937 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
20938}
20939
20940static bool
20942 ISD::MemIndexType &IndexType,
20944 if (!DCI.isBeforeLegalize())
20945 return false;
20946
20947 SelectionDAG &DAG = DCI.DAG;
20948 const MVT XLenVT =
20949 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
20950
20951 const EVT IndexVT = Index.getValueType();
20952
20953 // RISC-V indexed loads only support the "unsigned unscaled" addressing
20954 // mode, so anything else must be manually legalized.
20955 if (!isIndexTypeSigned(IndexType))
20956 return false;
20957
20958 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
20959 // Any index legalization should first promote to XLenVT, so we don't lose
20960 // bits when scaling. This may create an illegal index type so we let
20961 // LLVM's legalization take care of the splitting.
20962 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
20963 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
20964 EVT::getVectorVT(*DAG.getContext(), XLenVT,
20965 IndexVT.getVectorElementCount()),
20966 Index);
20967 }
20968 IndexType = ISD::UNSIGNED_SCALED;
20969 return true;
20970}
20971
20972/// Match the index vector of a scatter or gather node as the shuffle mask
20973/// which performs the rearrangement if possible. Will only match if
20974/// all lanes are touched, and thus replacing the scatter or gather with
20975/// a unit strided access and shuffle is legal.
20976static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
20977 SmallVector<int> &ShuffleMask) {
20978 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
20979 return false;
20980 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
20981 return false;
20982
20983 const unsigned ElementSize = VT.getScalarStoreSize();
20984 const unsigned NumElems = VT.getVectorNumElements();
20985
20986 // Create the shuffle mask and check all bits active
20987 assert(ShuffleMask.empty());
20988 BitVector ActiveLanes(NumElems);
20989 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
20990 // TODO: We've found an active bit of UB, and could be
20991 // more aggressive here if desired.
20992 if (Index->getOperand(i)->isUndef())
20993 return false;
20994 uint64_t C = Index->getConstantOperandVal(i);
20995 if (C % ElementSize != 0)
20996 return false;
20997 C = C / ElementSize;
20998 if (C >= NumElems)
20999 return false;
21000 ShuffleMask.push_back(C);
21001 ActiveLanes.set(C);
21002 }
21003 return ActiveLanes.all();
21004}
21005
21006/// Match the index of a gather or scatter operation as an operation
21007/// with twice the element width and half the number of elements. This is
21008/// generally profitable (if legal) because these operations are linear
21009/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
21010/// come out ahead.
21011static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
21012 Align BaseAlign, const RISCVSubtarget &ST) {
21013 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
21014 return false;
21015 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
21016 return false;
21017
21018 // Attempt a doubling. If we can use a element type 4x or 8x in
21019 // size, this will happen via multiply iterations of the transform.
21020 const unsigned NumElems = VT.getVectorNumElements();
21021 if (NumElems % 2 != 0)
21022 return false;
21023
21024 const unsigned ElementSize = VT.getScalarStoreSize();
21025 const unsigned WiderElementSize = ElementSize * 2;
21026 if (WiderElementSize > ST.getELen()/8)
21027 return false;
21028
21029 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
21030 return false;
21031
21032 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
21033 // TODO: We've found an active bit of UB, and could be
21034 // more aggressive here if desired.
21035 if (Index->getOperand(i)->isUndef())
21036 return false;
21037 // TODO: This offset check is too strict if we support fully
21038 // misaligned memory operations.
21039 uint64_t C = Index->getConstantOperandVal(i);
21040 if (i % 2 == 0) {
21041 if (C % WiderElementSize != 0)
21042 return false;
21043 continue;
21044 }
21045 uint64_t Last = Index->getConstantOperandVal(i-1);
21046 if (C != Last + ElementSize)
21047 return false;
21048 }
21049 return true;
21050}
21051
21052// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
21053// This would be benefit for the cases where X and Y are both the same value
21054// type of low precision vectors. Since the truncate would be lowered into
21055// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
21056// restriction, such pattern would be expanded into a series of "vsetvli"
21057// and "vnsrl" instructions later to reach this point.
21059 SDValue Mask = N->getOperand(1);
21060 SDValue VL = N->getOperand(2);
21061
21062 bool IsVLMAX = isAllOnesConstant(VL) ||
21063 (isa<RegisterSDNode>(VL) &&
21064 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
21065 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
21066 Mask.getOperand(0) != VL)
21067 return SDValue();
21068
21069 auto IsTruncNode = [&](SDValue V) {
21070 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
21071 V.getOperand(1) == Mask && V.getOperand(2) == VL;
21072 };
21073
21074 SDValue Op = N->getOperand(0);
21075
21076 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
21077 // to distinguish such pattern.
21078 while (IsTruncNode(Op)) {
21079 if (!Op.hasOneUse())
21080 return SDValue();
21081 Op = Op.getOperand(0);
21082 }
21083
21084 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
21085 return SDValue();
21086
21087 SDValue N0 = Op.getOperand(0);
21088 SDValue N1 = Op.getOperand(1);
21089 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
21090 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
21091 return SDValue();
21092
21093 SDValue N00 = N0.getOperand(0);
21094 SDValue N10 = N1.getOperand(0);
21095 if (!N00.getValueType().isVector() ||
21096 N00.getValueType() != N10.getValueType() ||
21097 N->getValueType(0) != N10.getValueType())
21098 return SDValue();
21099
21100 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
21101 SDValue SMin =
21102 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
21103 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
21104 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
21105}
21106
21107// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
21108// maximum value for the truncated type.
21109// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
21110// is the signed maximum value for the truncated type and C2 is the signed
21111// minimum value.
21113 const RISCVSubtarget &Subtarget) {
21114 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
21115
21116 MVT VT = N->getSimpleValueType(0);
21117
21118 SDValue Mask = N->getOperand(1);
21119 SDValue VL = N->getOperand(2);
21120
21121 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
21122 APInt &SplatVal) {
21123 if (V.getOpcode() != Opc &&
21124 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
21125 V.getOperand(3) == Mask && V.getOperand(4) == VL))
21126 return SDValue();
21127
21128 SDValue Op = V.getOperand(1);
21129
21130 // Peek through conversion between fixed and scalable vectors.
21131 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
21132 isNullConstant(Op.getOperand(2)) &&
21133 Op.getOperand(1).getValueType().isFixedLengthVector() &&
21134 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21135 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
21136 isNullConstant(Op.getOperand(1).getOperand(1)))
21137 Op = Op.getOperand(1).getOperand(0);
21138
21139 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
21140 return V.getOperand(0);
21141
21142 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
21143 Op.getOperand(2) == VL) {
21144 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
21145 SplatVal =
21146 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
21147 return V.getOperand(0);
21148 }
21149 }
21150
21151 return SDValue();
21152 };
21153
21154 SDLoc DL(N);
21155
21156 auto DetectUSatPattern = [&](SDValue V) {
21157 APInt LoC, HiC;
21158
21159 // Simple case, V is a UMIN.
21160 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
21161 if (HiC.isMask(VT.getScalarSizeInBits()))
21162 return UMinOp;
21163
21164 // If we have an SMAX that removes negative numbers first, then we can match
21165 // SMIN instead of UMIN.
21166 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
21167 if (SDValue SMaxOp =
21168 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
21169 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
21170 return SMinOp;
21171
21172 // If we have an SMIN before an SMAX and the SMAX constant is less than or
21173 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
21174 // first.
21175 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
21176 if (SDValue SMinOp =
21177 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
21178 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
21179 HiC.uge(LoC))
21180 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
21181 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
21182 Mask, VL);
21183
21184 return SDValue();
21185 };
21186
21187 auto DetectSSatPattern = [&](SDValue V) {
21188 unsigned NumDstBits = VT.getScalarSizeInBits();
21189 unsigned NumSrcBits = V.getScalarValueSizeInBits();
21190 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
21191 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
21192
21193 APInt HiC, LoC;
21194 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
21195 if (SDValue SMaxOp =
21196 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
21197 if (HiC == SignedMax && LoC == SignedMin)
21198 return SMaxOp;
21199
21200 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
21201 if (SDValue SMinOp =
21202 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
21203 if (HiC == SignedMax && LoC == SignedMin)
21204 return SMinOp;
21205
21206 return SDValue();
21207 };
21208
21209 SDValue Src = N->getOperand(0);
21210
21211 // Look through multiple layers of truncates.
21212 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
21213 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
21214 Src.hasOneUse())
21215 Src = Src.getOperand(0);
21216
21217 SDValue Val;
21218 unsigned ClipOpc;
21219 if ((Val = DetectUSatPattern(Src)))
21220 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
21221 else if ((Val = DetectSSatPattern(Src)))
21222 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
21223 else
21224 return SDValue();
21225
21226 MVT ValVT = Val.getSimpleValueType();
21227
21228 do {
21229 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
21230 ValVT = ValVT.changeVectorElementType(ValEltVT);
21231 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
21232 } while (ValVT != VT);
21233
21234 return Val;
21235}
21236
21237// Convert
21238// (iX ctpop (bitcast (vXi1 A)))
21239// ->
21240// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
21241// and
21242// (iN reduce.add (zext (vXi1 A to vXiN))
21243// ->
21244// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
21245// FIXME: It's complicated to match all the variations of this after type
21246// legalization so we only handle the pre-type legalization pattern, but that
21247// requires the fixed vector type to be legal.
21249 const RISCVSubtarget &Subtarget) {
21250 unsigned Opc = N->getOpcode();
21252 "Unexpected opcode");
21253 EVT VT = N->getValueType(0);
21254 if (!VT.isScalarInteger())
21255 return SDValue();
21256
21257 SDValue Src = N->getOperand(0);
21258
21259 if (Opc == ISD::CTPOP) {
21260 // Peek through zero_extend. It doesn't change the count.
21261 if (Src.getOpcode() == ISD::ZERO_EXTEND)
21262 Src = Src.getOperand(0);
21263
21264 if (Src.getOpcode() != ISD::BITCAST)
21265 return SDValue();
21266 Src = Src.getOperand(0);
21267 } else if (Opc == ISD::VECREDUCE_ADD) {
21268 if (Src.getOpcode() != ISD::ZERO_EXTEND)
21269 return SDValue();
21270 Src = Src.getOperand(0);
21271 }
21272
21273 EVT SrcEVT = Src.getValueType();
21274 if (!SrcEVT.isSimple())
21275 return SDValue();
21276
21277 MVT SrcMVT = SrcEVT.getSimpleVT();
21278 // Make sure the input is an i1 vector.
21279 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
21280 return SDValue();
21281
21282 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21283 if (!TLI.isTypeLegal(SrcMVT))
21284 return SDValue();
21285
21286 // Check that destination type is large enough to hold result without
21287 // overflow.
21288 if (Opc == ISD::VECREDUCE_ADD) {
21289 unsigned EltSize = SrcMVT.getScalarSizeInBits();
21290 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
21291 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
21292 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
21293 ? SrcMVT.getVectorNumElements()
21295 VectorBitsMax, EltSize, MinSize);
21296 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
21297 return SDValue();
21298 }
21299
21300 MVT ContainerVT = SrcMVT;
21301 if (SrcMVT.isFixedLengthVector()) {
21302 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
21303 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
21304 }
21305
21306 SDLoc DL(N);
21307 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
21308
21309 MVT XLenVT = Subtarget.getXLenVT();
21310 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
21311 return DAG.getZExtOrTrunc(Pop, DL, VT);
21312}
21313
21316 const RISCVSubtarget &Subtarget) {
21317 // (shl (zext x), y) -> (vwsll x, y)
21318 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21319 return V;
21320
21321 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
21322 // (shl (zext x), C) -> (vwmulu x, 1u << C)
21323
21324 if (!DCI.isAfterLegalizeDAG())
21325 return SDValue();
21326
21327 SDValue LHS = N->getOperand(0);
21328 if (!LHS.hasOneUse())
21329 return SDValue();
21330 unsigned Opcode;
21331 switch (LHS.getOpcode()) {
21332 case ISD::SIGN_EXTEND:
21333 case RISCVISD::VSEXT_VL:
21334 Opcode = RISCVISD::VWMULSU_VL;
21335 break;
21336 case ISD::ZERO_EXTEND:
21337 case RISCVISD::VZEXT_VL:
21338 Opcode = RISCVISD::VWMULU_VL;
21339 break;
21340 default:
21341 return SDValue();
21342 }
21343
21344 SDValue RHS = N->getOperand(1);
21345 APInt ShAmt;
21346 uint64_t ShAmtInt;
21347 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
21348 ShAmtInt = ShAmt.getZExtValue();
21349 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
21350 RHS.getOperand(1).getOpcode() == ISD::Constant)
21351 ShAmtInt = RHS.getConstantOperandVal(1);
21352 else
21353 return SDValue();
21354
21355 // Better foldings:
21356 // (shl (sext x), 1) -> (vwadd x, x)
21357 // (shl (zext x), 1) -> (vwaddu x, x)
21358 if (ShAmtInt <= 1)
21359 return SDValue();
21360
21361 SDValue NarrowOp = LHS.getOperand(0);
21362 MVT NarrowVT = NarrowOp.getSimpleValueType();
21363 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
21364 if (ShAmtInt >= NarrowBits)
21365 return SDValue();
21366 MVT VT = N->getSimpleValueType(0);
21367 if (NarrowBits * 2 != VT.getScalarSizeInBits())
21368 return SDValue();
21369
21370 SelectionDAG &DAG = DCI.DAG;
21371 SDLoc DL(N);
21372 SDValue Passthru, Mask, VL;
21373 switch (N->getOpcode()) {
21374 case ISD::SHL:
21375 Passthru = DAG.getUNDEF(VT);
21376 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
21377 break;
21378 case RISCVISD::SHL_VL:
21379 Passthru = N->getOperand(2);
21380 Mask = N->getOperand(3);
21381 VL = N->getOperand(4);
21382 break;
21383 default:
21384 llvm_unreachable("Expected SHL");
21385 }
21386 return DAG.getNode(Opcode, DL, VT, NarrowOp,
21387 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
21388 Passthru, Mask, VL);
21389}
21390
21392 DAGCombinerInfo &DCI) const {
21393 SelectionDAG &DAG = DCI.DAG;
21394 const MVT XLenVT = Subtarget.getXLenVT();
21395 SDLoc DL(N);
21396
21397 // Helper to call SimplifyDemandedBits on an operand of N where only some low
21398 // bits are demanded. N will be added to the Worklist if it was not deleted.
21399 // Caller should return SDValue(N, 0) if this returns true.
21400 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
21401 SDValue Op = N->getOperand(OpNo);
21402 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
21403 if (!SimplifyDemandedBits(Op, Mask, DCI))
21404 return false;
21405
21406 if (N->getOpcode() != ISD::DELETED_NODE)
21407 DCI.AddToWorklist(N);
21408 return true;
21409 };
21410
21411 switch (N->getOpcode()) {
21412 default:
21413 break;
21414 case RISCVISD::SplitF64: {
21415 SDValue Op0 = N->getOperand(0);
21416 // If the input to SplitF64 is just BuildPairF64 then the operation is
21417 // redundant. Instead, use BuildPairF64's operands directly.
21418 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
21419 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
21420
21421 if (Op0->isUndef()) {
21422 SDValue Lo = DAG.getUNDEF(MVT::i32);
21423 SDValue Hi = DAG.getUNDEF(MVT::i32);
21424 return DCI.CombineTo(N, Lo, Hi);
21425 }
21426
21427 // It's cheaper to materialise two 32-bit integers than to load a double
21428 // from the constant pool and transfer it to integer registers through the
21429 // stack.
21431 APInt V = C->getValueAPF().bitcastToAPInt();
21432 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
21433 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
21434 return DCI.CombineTo(N, Lo, Hi);
21435 }
21436
21437 // This is a target-specific version of a DAGCombine performed in
21438 // DAGCombiner::visitBITCAST. It performs the equivalent of:
21439 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
21440 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
21441 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
21442 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
21443 break;
21444 SDValue NewSplitF64 =
21445 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
21446 Op0.getOperand(0));
21447 SDValue Lo = NewSplitF64.getValue(0);
21448 SDValue Hi = NewSplitF64.getValue(1);
21449 APInt SignBit = APInt::getSignMask(32);
21450 if (Op0.getOpcode() == ISD::FNEG) {
21451 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
21452 DAG.getConstant(SignBit, DL, MVT::i32));
21453 return DCI.CombineTo(N, Lo, NewHi);
21454 }
21455 assert(Op0.getOpcode() == ISD::FABS);
21456 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
21457 DAG.getConstant(~SignBit, DL, MVT::i32));
21458 return DCI.CombineTo(N, Lo, NewHi);
21459 }
21460 case RISCVISD::SLLW:
21461 case RISCVISD::SRAW:
21462 case RISCVISD::SRLW:
21463 case RISCVISD::RORW:
21464 case RISCVISD::ROLW: {
21465 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
21466 if (SimplifyDemandedLowBitsHelper(0, 32) ||
21467 SimplifyDemandedLowBitsHelper(1, 5))
21468 return SDValue(N, 0);
21469
21470 break;
21471 }
21472 case RISCVISD::ABSW:
21473 case RISCVISD::CLSW:
21474 case RISCVISD::CLZW:
21475 case RISCVISD::CTZW: {
21476 // Only the lower 32 bits of the first operand are read
21477 if (SimplifyDemandedLowBitsHelper(0, 32))
21478 return SDValue(N, 0);
21479 break;
21480 }
21481 case RISCVISD::WMULSU: {
21482 // Convert to MULHSU if only the upper half is used.
21483 if (!N->hasAnyUseOfValue(0)) {
21484 SDValue Res = DAG.getNode(RISCVISD::MULHSU, DL, N->getValueType(1),
21485 N->getOperand(0), N->getOperand(1));
21486 return DCI.CombineTo(N, Res, Res);
21487 }
21488 break;
21489 }
21490 case RISCVISD::ADDD: {
21491 assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&
21492 "ADDD is only for RV32 with P extension");
21493
21494 SDValue Op0Lo = N->getOperand(0);
21495 SDValue Op0Hi = N->getOperand(1);
21496 SDValue Op1Lo = N->getOperand(2);
21497 SDValue Op1Hi = N->getOperand(3);
21498
21499 // (ADDD lo, hi, x, 0) -> (WADDAU lo, hi, x, 0)
21500 if (isNullConstant(Op1Hi)) {
21501 SDValue Result =
21502 DAG.getNode(RISCVISD::WADDAU, DL, DAG.getVTList(MVT::i32, MVT::i32),
21503 Op0Lo, Op0Hi, Op1Lo, DAG.getConstant(0, DL, MVT::i32));
21504 return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));
21505 }
21506 // (ADDD x, 0, lo, hi) -> (WADDAU lo, hi, x, 0)
21507 if (isNullConstant(Op0Hi)) {
21508 SDValue Result =
21509 DAG.getNode(RISCVISD::WADDAU, DL, DAG.getVTList(MVT::i32, MVT::i32),
21510 Op1Lo, Op1Hi, Op0Lo, DAG.getConstant(0, DL, MVT::i32));
21511 return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));
21512 }
21513 break;
21514 }
21515 case RISCVISD::SUBD: {
21516 assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&
21517 "SUBD is only for RV32 with P extension");
21518
21519 SDValue Op0Lo = N->getOperand(0);
21520 SDValue Op0Hi = N->getOperand(1);
21521 SDValue Op1Lo = N->getOperand(2);
21522 SDValue Op1Hi = N->getOperand(3);
21523
21524 // (SUBD lo, hi, x, 0) -> (WSUBAU lo, hi, 0, x)
21525 // WSUBAU semantics: rd = rd + zext(rs1) - zext(rs2)
21526 if (isNullConstant(Op1Hi)) {
21527 SDValue Result =
21528 DAG.getNode(RISCVISD::WSUBAU, DL, DAG.getVTList(MVT::i32, MVT::i32),
21529 Op0Lo, Op0Hi, DAG.getConstant(0, DL, MVT::i32), Op1Lo);
21530 return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));
21531 }
21532 break;
21533 }
21534 case RISCVISD::WADDAU: {
21535 assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&
21536 "WADDAU is only for RV32 with P extension");
21537 SDValue Op0Lo = N->getOperand(0);
21538 SDValue Op0Hi = N->getOperand(1);
21539 SDValue Op1 = N->getOperand(2);
21540 SDValue Op2 = N->getOperand(3);
21541
21542 // FIXME: Canonicalize zero Op1 to Op2.
21543 if (isNullConstant(Op2) && Op0Lo.getNode() == Op0Hi.getNode() &&
21544 Op0Lo.getResNo() == 0 && Op0Hi.getResNo() == 1 && Op0Lo.hasOneUse() &&
21545 Op0Hi.hasOneUse()) {
21546 // (WADDAU (WADDAU lo, hi, x, 0), y, 0) -> (WADDAU lo, hi, x, y)
21547 if (Op0Lo.getOpcode() == RISCVISD::WADDAU &&
21548 isNullConstant(Op0Lo.getOperand(3))) {
21549 SDValue Result = DAG.getNode(
21550 RISCVISD::WADDAU, DL, DAG.getVTList(MVT::i32, MVT::i32),
21551 Op0Lo.getOperand(0), Op0Lo.getOperand(1), Op0Lo.getOperand(2), Op1);
21552 return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));
21553 }
21554 // (WADDAU (WSUBAU lo, hi, 0, a), b, 0) -> (WSUBAU lo, hi, b, a)
21555 if (Op0Lo.getOpcode() == RISCVISD::WSUBAU &&
21556 isNullConstant(Op0Lo.getOperand(2))) {
21557 SDValue Result = DAG.getNode(
21558 RISCVISD::WSUBAU, DL, DAG.getVTList(MVT::i32, MVT::i32),
21559 Op0Lo.getOperand(0), Op0Lo.getOperand(1), Op1, Op0Lo.getOperand(3));
21560 return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));
21561 }
21562 }
21563 break;
21564 }
21565 case RISCVISD::WSUBAU: {
21566 assert(!Subtarget.is64Bit() && Subtarget.hasStdExtP() &&
21567 "WSUBAU is only for RV32 with P extension");
21568 SDValue Op0Lo = N->getOperand(0);
21569 SDValue Op0Hi = N->getOperand(1);
21570 SDValue Op1 = N->getOperand(2);
21571 SDValue Op2 = N->getOperand(3);
21572
21573 // (WSUBAU (WADDAU lo, hi, a, 0), 0, b) -> (WSUBAU lo, hi, a, b)
21574 if (isNullConstant(Op1) && Op0Lo.getOpcode() == RISCVISD::WADDAU &&
21575 Op0Lo.getNode() == Op0Hi.getNode() && Op0Lo.getResNo() == 0 &&
21576 Op0Hi.getResNo() == 1 && Op0Lo.hasOneUse() && Op0Hi.hasOneUse() &&
21577 isNullConstant(Op0Lo.getOperand(3))) {
21578 SDValue Result = DAG.getNode(
21579 RISCVISD::WSUBAU, DL, DAG.getVTList(MVT::i32, MVT::i32),
21580 Op0Lo.getOperand(0), Op0Lo.getOperand(1), Op0Lo.getOperand(2), Op2);
21581 return DCI.CombineTo(N, Result.getValue(0), Result.getValue(1));
21582 }
21583 break;
21584 }
21585 case RISCVISD::FMV_W_X_RV64: {
21586 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
21587 // conversion is unnecessary and can be replaced with the
21588 // FMV_X_ANYEXTW_RV64 operand.
21589 SDValue Op0 = N->getOperand(0);
21590 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
21591 return Op0.getOperand(0);
21592 break;
21593 }
21594 case RISCVISD::FMV_X_ANYEXTH:
21595 case RISCVISD::FMV_X_ANYEXTW_RV64: {
21596 SDLoc DL(N);
21597 SDValue Op0 = N->getOperand(0);
21598 MVT VT = N->getSimpleValueType(0);
21599
21600 // Constant fold.
21601 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
21602 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
21603 return DAG.getConstant(Val, DL, VT);
21604 }
21605
21606 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
21607 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
21608 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
21609 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
21610 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
21611 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
21612 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
21613 assert(Op0.getOperand(0).getValueType() == VT &&
21614 "Unexpected value type!");
21615 return Op0.getOperand(0);
21616 }
21617
21618 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
21619 cast<LoadSDNode>(Op0)->isSimple()) {
21621 auto *LN0 = cast<LoadSDNode>(Op0);
21622 SDValue Load =
21623 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
21624 LN0->getBasePtr(), IVT, LN0->getMemOperand());
21625 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
21626 return Load;
21627 }
21628
21629 // This is a target-specific version of a DAGCombine performed in
21630 // DAGCombiner::visitBITCAST. It performs the equivalent of:
21631 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
21632 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
21633 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
21634 !Op0.getNode()->hasOneUse())
21635 break;
21636 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
21637 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
21638 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
21639 if (Op0.getOpcode() == ISD::FNEG)
21640 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
21641 DAG.getConstant(SignBit, DL, VT));
21642
21643 assert(Op0.getOpcode() == ISD::FABS);
21644 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
21645 DAG.getConstant(~SignBit, DL, VT));
21646 }
21647 case ISD::ABS: {
21648 EVT VT = N->getValueType(0);
21649 SDValue N0 = N->getOperand(0);
21650 // abs (sext) -> zext (abs)
21651 // abs (zext) -> zext (handled elsewhere)
21652 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
21653 SDValue Src = N0.getOperand(0);
21654 SDLoc DL(N);
21655 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
21656 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
21657 }
21658 break;
21659 }
21660 case ISD::ADD: {
21661 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21662 return V;
21663 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
21664 return V;
21665 if (SDValue V = combineVdota4Accum(N, DAG, Subtarget))
21666 return V;
21667 return performADDCombine(N, DCI, Subtarget);
21668 }
21669 case ISD::SUB: {
21670 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21671 return V;
21672 return performSUBCombine(N, DAG, Subtarget);
21673 }
21674 case ISD::AND:
21675 return performANDCombine(N, DCI, Subtarget);
21676 case ISD::OR: {
21677 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21678 return V;
21679 return performORCombine(N, DCI, Subtarget);
21680 }
21681 case ISD::XOR:
21682 return performXORCombine(N, DAG, Subtarget);
21683 case ISD::MUL:
21684 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21685 return V;
21686 return performMULCombine(N, DAG, DCI, Subtarget);
21687 case ISD::SDIV:
21688 case ISD::UDIV:
21689 case ISD::SREM:
21690 case ISD::UREM:
21691 if (SDValue V = combineBinOpOfZExt(N, DAG))
21692 return V;
21693 break;
21694 case ISD::FMUL: {
21695 using namespace SDPatternMatch;
21696 SDLoc DL(N);
21697 EVT VT = N->getValueType(0);
21698 SDValue X, Y;
21699 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
21700 // hoistFNegAboveFMulFDiv.
21701 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
21703 return DAG.getNode(ISD::FNEG, DL, VT,
21704 DAG.getNode(ISD::FMUL, DL, VT, X, Y, N->getFlags()),
21705 N->getFlags());
21706
21707 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
21708 SDValue N0 = N->getOperand(0);
21709 SDValue N1 = N->getOperand(1);
21710 if (N0->getOpcode() != ISD::FCOPYSIGN)
21711 std::swap(N0, N1);
21712 if (N0->getOpcode() != ISD::FCOPYSIGN)
21713 return SDValue();
21715 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
21716 return SDValue();
21717 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
21718 return SDValue();
21719 SDValue Sign = N0->getOperand(1);
21720 if (Sign.getValueType() != VT)
21721 return SDValue();
21722 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
21723 }
21724 case ISD::FADD:
21725 case ISD::UMAX:
21726 case ISD::UMIN:
21727 case ISD::SMAX:
21728 case ISD::SMIN:
21729 case ISD::FMAXNUM:
21730 case ISD::FMINNUM: {
21731 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
21732 return V;
21733 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
21734 return V;
21735 return SDValue();
21736 }
21737 case ISD::FMA: {
21738 SDValue N0 = N->getOperand(0);
21739 SDValue N1 = N->getOperand(1);
21740 if (N0.getOpcode() != ISD::SPLAT_VECTOR)
21741 std::swap(N0, N1);
21742 if (N0.getOpcode() != ISD::SPLAT_VECTOR)
21743 return SDValue();
21744 SDValue SplatN0 = N0.getOperand(0);
21745 if (SplatN0.getOpcode() != ISD::FNEG || !SplatN0.hasOneUse())
21746 return SDValue();
21747 EVT VT = N->getValueType(0);
21748 SDValue Splat =
21749 DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, SplatN0.getOperand(0));
21750 SDValue Fneg = DAG.getNode(ISD::FNEG, DL, VT, Splat);
21751 return DAG.getNode(ISD::FMA, DL, VT, Fneg, N1, N->getOperand(2));
21752 }
21753 case ISD::SETCC:
21754 return performSETCCCombine(N, DCI, Subtarget);
21756 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
21757 case ISD::ZERO_EXTEND:
21758 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
21759 // type legalization. This is safe because fp_to_uint produces poison if
21760 // it overflows.
21761 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
21762 SDValue Src = N->getOperand(0);
21763 if (Src.getOpcode() == ISD::FP_TO_UINT &&
21764 isTypeLegal(Src.getOperand(0).getValueType()))
21765 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
21766 Src.getOperand(0));
21767 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
21768 isTypeLegal(Src.getOperand(1).getValueType())) {
21769 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
21770 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
21771 Src.getOperand(0), Src.getOperand(1));
21772 DCI.CombineTo(N, Res);
21773 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
21774 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
21775 return SDValue(N, 0); // Return N so it doesn't get rechecked.
21776 }
21777 }
21778 return SDValue();
21779 case RISCVISD::TRUNCATE_VECTOR_VL:
21780 if (SDValue V = combineTruncOfSraSext(N, DAG))
21781 return V;
21782 return combineTruncToVnclip(N, DAG, Subtarget);
21783 case ISD::VP_TRUNCATE:
21784 return performVP_TRUNCATECombine(N, DAG, Subtarget);
21785 case ISD::TRUNCATE:
21786 return performTRUNCATECombine(N, DAG, Subtarget);
21787 case ISD::SELECT:
21788 return performSELECTCombine(N, DAG, Subtarget);
21789 case ISD::VSELECT:
21790 return performVSELECTCombine(N, DAG);
21791 case RISCVISD::CZERO_EQZ:
21792 case RISCVISD::CZERO_NEZ: {
21793 SDValue Val = N->getOperand(0);
21794 SDValue Cond = N->getOperand(1);
21795 MVT VT = N->getSimpleValueType(0);
21796
21797 unsigned Opc = N->getOpcode();
21798
21799 // czero_eqz x, x -> x
21800 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
21801 return Val;
21802
21803 unsigned InvOpc =
21804 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
21805
21806 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
21807 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
21808 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
21809 SDValue NewCond = Cond.getOperand(0);
21810 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
21811 if (DAG.MaskedValueIsZero(NewCond, Mask))
21812 return DAG.getNode(InvOpc, SDLoc(N), VT, Val, NewCond);
21813 }
21814 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
21815 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
21816 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
21817 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
21818 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
21819 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
21820 if (ISD::isIntEqualitySetCC(CCVal))
21821 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N), VT,
21822 Val, Cond.getOperand(0));
21823 }
21824
21825 // Remove SRL from bittest patterns (srl (and X, (1 << C)), C) if the and
21826 // is an ANDI. Because only 1 bit can be set after the AND, it doesn't
21827 // matter if we shift it.
21828 if (Cond.getOpcode() == ISD::SRL &&
21829 isa<ConstantSDNode>(Cond.getOperand(1)) &&
21830 Cond.getOperand(0).getOpcode() == ISD::AND) {
21831 const APInt &ShAmt = Cond.getConstantOperandAPInt(1);
21832 unsigned BitWidth = VT.getSizeInBits();
21833 SDValue And = Cond.getOperand(0);
21834 if (ShAmt.ult(BitWidth) && isa<ConstantSDNode>(And.getOperand(1))) {
21835 uint64_t AndConst = And.getConstantOperandVal(1);
21836 if (AndConst == (1ULL << ShAmt.getZExtValue()) && isInt<12>(AndConst))
21837 return DAG.getNode(Opc, DL, VT, Val, And);
21838 }
21839 }
21840
21841 // czero_nez (setcc X, Y, CC), (setcc X, Y, eq) -> (setcc X, Y, CC)
21842 // if CC is a strict inequality (lt, gt, ult, ugt), because when X == Y
21843 // the setcc result is already 0. The eq operands can be in either order.
21844 if (Opc == RISCVISD::CZERO_NEZ && Val.getOpcode() == ISD::SETCC &&
21845 Cond.getOpcode() == ISD::SETCC &&
21846 cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ) {
21847 ISD::CondCode ValCC = cast<CondCodeSDNode>(Val.getOperand(2))->get();
21848 bool SameOperands = (Val.getOperand(0) == Cond.getOperand(0) &&
21849 Val.getOperand(1) == Cond.getOperand(1)) ||
21850 (Val.getOperand(0) == Cond.getOperand(1) &&
21851 Val.getOperand(1) == Cond.getOperand(0));
21852 if (SameOperands && (ValCC == ISD::SETLT || ValCC == ISD::SETGT ||
21853 ValCC == ISD::SETULT || ValCC == ISD::SETUGT))
21854 return Val;
21855 }
21856
21857 return SDValue();
21858 }
21859 case RISCVISD::SELECT_CC: {
21860 // Transform
21861 SDValue LHS = N->getOperand(0);
21862 SDValue RHS = N->getOperand(1);
21863 SDValue CC = N->getOperand(2);
21864 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
21865 SDValue TrueV = N->getOperand(3);
21866 SDValue FalseV = N->getOperand(4);
21867 SDLoc DL(N);
21868 EVT VT = N->getValueType(0);
21869
21870 // If the True and False values are the same, we don't need a select_cc.
21871 if (TrueV == FalseV)
21872 return TrueV;
21873
21874 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
21875 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
21876 if (!Subtarget.hasShortForwardBranchIALU() && isa<ConstantSDNode>(TrueV) &&
21877 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
21878 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
21879 if (CCVal == ISD::CondCode::SETGE)
21880 std::swap(TrueV, FalseV);
21881
21882 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
21883 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
21884 // Only handle simm12, if it is not in this range, it can be considered as
21885 // register.
21886 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
21887 isInt<12>(TrueSImm - FalseSImm)) {
21888 SDValue SRA =
21889 DAG.getNode(ISD::SRA, DL, VT, LHS,
21890 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
21891 SDValue AND =
21892 DAG.getNode(ISD::AND, DL, VT, SRA,
21893 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
21894 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
21895 }
21896
21897 if (CCVal == ISD::CondCode::SETGE)
21898 std::swap(TrueV, FalseV);
21899 }
21900
21901 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
21902 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
21903 {LHS, RHS, CC, TrueV, FalseV});
21904
21905 if (!Subtarget.hasConditionalMoveFusion()) {
21906 // (select c, -1, y) -> -c | y
21907 if (isAllOnesConstant(TrueV)) {
21908 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
21909 SDValue Neg = DAG.getNegative(C, DL, VT);
21910 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
21911 }
21912 // (select c, y, -1) -> -!c | y
21913 if (isAllOnesConstant(FalseV)) {
21914 SDValue C =
21915 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
21916 SDValue Neg = DAG.getNegative(C, DL, VT);
21917 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
21918 }
21919
21920 // (select c, 0, y) -> -!c & y
21921 if (isNullConstant(TrueV)) {
21922 SDValue C =
21923 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
21924 SDValue Neg = DAG.getNegative(C, DL, VT);
21925 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
21926 }
21927 // (select c, y, 0) -> -c & y
21928 if (isNullConstant(FalseV)) {
21929 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
21930 SDValue Neg = DAG.getNegative(C, DL, VT);
21931 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
21932 }
21933 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
21934 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
21935 if (((isOneConstant(FalseV) && LHS == TrueV &&
21936 CCVal == ISD::CondCode::SETNE) ||
21937 (isOneConstant(TrueV) && LHS == FalseV &&
21938 CCVal == ISD::CondCode::SETEQ)) &&
21939 isNullConstant(RHS)) {
21940 // freeze it to be safe.
21941 LHS = DAG.getFreeze(LHS);
21942 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
21943 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
21944 }
21945 }
21946
21947 // If both true/false are an xor with 1, pull through the select.
21948 // This can occur after op legalization if both operands are setccs that
21949 // require an xor to invert.
21950 // FIXME: Generalize to other binary ops with identical operand?
21951 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
21952 TrueV.getOperand(1) == FalseV.getOperand(1) &&
21953 isOneConstant(TrueV.getOperand(1)) &&
21954 TrueV.hasOneUse() && FalseV.hasOneUse()) {
21955 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
21956 TrueV.getOperand(0), FalseV.getOperand(0));
21957 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
21958 }
21959
21960 return SDValue();
21961 }
21962 case RISCVISD::BR_CC: {
21963 SDValue LHS = N->getOperand(1);
21964 SDValue RHS = N->getOperand(2);
21965 SDValue CC = N->getOperand(3);
21966 SDLoc DL(N);
21967
21968 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
21969 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
21970 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
21971
21972 return SDValue();
21973 }
21974 case ISD::BITREVERSE:
21975 return performBITREVERSECombine(N, DAG, Subtarget);
21976 case ISD::FP_TO_SINT:
21977 case ISD::FP_TO_UINT:
21978 return performFP_TO_INTCombine(N, DCI, Subtarget);
21981 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
21982 case ISD::FCOPYSIGN: {
21983 EVT VT = N->getValueType(0);
21984 if (!VT.isVector())
21985 break;
21986 // There is a form of VFSGNJ which injects the negated sign of its second
21987 // operand. Try and bubble any FNEG up after the extend/round to produce
21988 // this optimized pattern. Avoid modifying cases where FP_ROUND and
21989 // TRUNC=1.
21990 SDValue In2 = N->getOperand(1);
21991 // Avoid cases where the extend/round has multiple uses, as duplicating
21992 // those is typically more expensive than removing a fneg.
21993 if (!In2.hasOneUse())
21994 break;
21995 if (In2.getOpcode() != ISD::FP_EXTEND &&
21996 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
21997 break;
21998 In2 = In2.getOperand(0);
21999 if (In2.getOpcode() != ISD::FNEG)
22000 break;
22001 SDLoc DL(N);
22002 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
22003 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
22004 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
22005 }
22006 case ISD::MGATHER: {
22007 const auto *MGN = cast<MaskedGatherSDNode>(N);
22008 const EVT VT = N->getValueType(0);
22009 SDValue Index = MGN->getIndex();
22010 SDValue ScaleOp = MGN->getScale();
22011 ISD::MemIndexType IndexType = MGN->getIndexType();
22012 assert(!MGN->isIndexScaled() &&
22013 "Scaled gather/scatter should not be formed");
22014
22015 SDLoc DL(N);
22016 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
22017 return DAG.getMaskedGather(
22018 N->getVTList(), MGN->getMemoryVT(), DL,
22019 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
22020 MGN->getBasePtr(), Index, ScaleOp},
22021 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
22022
22023 if (narrowIndex(Index, IndexType, DAG))
22024 return DAG.getMaskedGather(
22025 N->getVTList(), MGN->getMemoryVT(), DL,
22026 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
22027 MGN->getBasePtr(), Index, ScaleOp},
22028 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
22029
22030 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
22031 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
22032 // The sequence will be XLenVT, not the type of Index. Tell
22033 // isSimpleVIDSequence this so we avoid overflow.
22034 if (std::optional<VIDSequence> SimpleVID =
22035 isSimpleVIDSequence(Index, Subtarget.getXLen());
22036 SimpleVID && SimpleVID->StepDenominator == 1) {
22037 const int64_t StepNumerator = SimpleVID->StepNumerator;
22038 const int64_t Addend = SimpleVID->Addend;
22039
22040 // Note: We don't need to check alignment here since (by assumption
22041 // from the existence of the gather), our offsets must be sufficiently
22042 // aligned.
22043
22044 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
22045 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
22046 assert(IndexType == ISD::UNSIGNED_SCALED);
22047 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
22048 DAG.getSignedConstant(Addend, DL, PtrVT));
22049
22050 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
22052 SDValue StridedLoad = DAG.getStridedLoadVP(
22053 VT, DL, MGN->getChain(), BasePtr,
22054 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
22055 EVL, MGN->getMemOperand());
22056 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
22057 MGN->getPassThru());
22058 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
22059 DL);
22060 }
22061 }
22062
22063 SmallVector<int> ShuffleMask;
22064 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
22065 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
22066 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
22067 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
22068 MGN->getMask(), DAG.getUNDEF(VT),
22069 MGN->getMemoryVT(), MGN->getMemOperand(),
22071 SDValue Shuffle =
22072 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
22073 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
22074 }
22075
22076 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
22077 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
22078 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
22079 SmallVector<SDValue> NewIndices;
22080 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
22081 NewIndices.push_back(Index.getOperand(i));
22082 EVT IndexVT = Index.getValueType()
22084 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
22085
22086 unsigned ElementSize = VT.getScalarStoreSize();
22087 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
22088 auto EltCnt = VT.getVectorElementCount();
22089 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
22090 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
22091 EltCnt.divideCoefficientBy(2));
22092 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
22093 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
22094 EltCnt.divideCoefficientBy(2));
22095 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
22096
22097 SDValue Gather =
22098 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
22099 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
22100 Index, ScaleOp},
22101 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
22102 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
22103 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
22104 }
22105 break;
22106 }
22107 case ISD::MSCATTER:{
22108 const auto *MSN = cast<MaskedScatterSDNode>(N);
22109 SDValue Index = MSN->getIndex();
22110 SDValue ScaleOp = MSN->getScale();
22111 ISD::MemIndexType IndexType = MSN->getIndexType();
22112 assert(!MSN->isIndexScaled() &&
22113 "Scaled gather/scatter should not be formed");
22114
22115 SDLoc DL(N);
22116 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
22117 return DAG.getMaskedScatter(
22118 N->getVTList(), MSN->getMemoryVT(), DL,
22119 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
22120 Index, ScaleOp},
22121 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
22122
22123 if (narrowIndex(Index, IndexType, DAG))
22124 return DAG.getMaskedScatter(
22125 N->getVTList(), MSN->getMemoryVT(), DL,
22126 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
22127 Index, ScaleOp},
22128 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
22129
22130 EVT VT = MSN->getValue()->getValueType(0);
22131 SmallVector<int> ShuffleMask;
22132 if (!MSN->isTruncatingStore() &&
22133 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
22134 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
22135 DAG.getUNDEF(VT), ShuffleMask);
22136 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
22137 DAG.getUNDEF(XLenVT), MSN->getMask(),
22138 MSN->getMemoryVT(), MSN->getMemOperand(),
22139 ISD::UNINDEXED, false);
22140 }
22141 break;
22142 }
22143 case ISD::VP_GATHER: {
22144 const auto *VPGN = cast<VPGatherSDNode>(N);
22145 SDValue Index = VPGN->getIndex();
22146 SDValue ScaleOp = VPGN->getScale();
22147 ISD::MemIndexType IndexType = VPGN->getIndexType();
22148 assert(!VPGN->isIndexScaled() &&
22149 "Scaled gather/scatter should not be formed");
22150
22151 SDLoc DL(N);
22152 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
22153 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
22154 {VPGN->getChain(), VPGN->getBasePtr(), Index,
22155 ScaleOp, VPGN->getMask(),
22156 VPGN->getVectorLength()},
22157 VPGN->getMemOperand(), IndexType);
22158
22159 if (narrowIndex(Index, IndexType, DAG))
22160 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
22161 {VPGN->getChain(), VPGN->getBasePtr(), Index,
22162 ScaleOp, VPGN->getMask(),
22163 VPGN->getVectorLength()},
22164 VPGN->getMemOperand(), IndexType);
22165
22166 break;
22167 }
22168 case ISD::VP_SCATTER: {
22169 const auto *VPSN = cast<VPScatterSDNode>(N);
22170 SDValue Index = VPSN->getIndex();
22171 SDValue ScaleOp = VPSN->getScale();
22172 ISD::MemIndexType IndexType = VPSN->getIndexType();
22173 assert(!VPSN->isIndexScaled() &&
22174 "Scaled gather/scatter should not be formed");
22175
22176 SDLoc DL(N);
22177 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
22178 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
22179 {VPSN->getChain(), VPSN->getValue(),
22180 VPSN->getBasePtr(), Index, ScaleOp,
22181 VPSN->getMask(), VPSN->getVectorLength()},
22182 VPSN->getMemOperand(), IndexType);
22183
22184 if (narrowIndex(Index, IndexType, DAG))
22185 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
22186 {VPSN->getChain(), VPSN->getValue(),
22187 VPSN->getBasePtr(), Index, ScaleOp,
22188 VPSN->getMask(), VPSN->getVectorLength()},
22189 VPSN->getMemOperand(), IndexType);
22190 break;
22191 }
22192 case RISCVISD::SHL_VL:
22193 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
22194 return V;
22195 [[fallthrough]];
22196 case RISCVISD::SRA_VL:
22197 case RISCVISD::SRL_VL: {
22198 SDValue ShAmt = N->getOperand(1);
22199 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
22200 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
22201 SDLoc DL(N);
22202 SDValue VL = N->getOperand(4);
22203 EVT VT = N->getValueType(0);
22204 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
22205 ShAmt.getOperand(1), VL);
22206 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
22207 N->getOperand(2), N->getOperand(3), N->getOperand(4));
22208 }
22209 break;
22210 }
22211 case ISD::SRA:
22212 if (SDValue V = performSRACombine(N, DAG, Subtarget))
22213 return V;
22214 [[fallthrough]];
22215 case ISD::SRL:
22216 case ISD::SHL: {
22217 if (N->getOpcode() == ISD::SHL) {
22218 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
22219 return V;
22220 }
22221 SDValue ShAmt = N->getOperand(1);
22222 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
22223 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
22224 SDLoc DL(N);
22225 EVT VT = N->getValueType(0);
22226 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
22227 ShAmt.getOperand(1),
22228 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
22229 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
22230 }
22231 break;
22232 }
22233 case RISCVISD::ADD_VL:
22234 if (SDValue V = simplifyOp_VL(N))
22235 return V;
22236 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
22237 return V;
22238 if (SDValue V = combineVdota4Accum(N, DAG, Subtarget))
22239 return V;
22240 return combineToVWMACC(N, DAG, Subtarget);
22241 case RISCVISD::VWADDU_VL:
22242 return performVWABDACombine(N, DAG, Subtarget);
22243 case RISCVISD::VWADD_W_VL:
22244 case RISCVISD::VWADDU_W_VL:
22245 case RISCVISD::VWSUB_W_VL:
22246 case RISCVISD::VWSUBU_W_VL:
22247 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
22248 case RISCVISD::OR_VL:
22249 case RISCVISD::SUB_VL:
22250 case RISCVISD::MUL_VL:
22251 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
22252 case RISCVISD::VFMADD_VL:
22253 case RISCVISD::VFNMADD_VL:
22254 case RISCVISD::VFMSUB_VL:
22255 case RISCVISD::VFNMSUB_VL:
22256 case RISCVISD::STRICT_VFMADD_VL:
22257 case RISCVISD::STRICT_VFNMADD_VL:
22258 case RISCVISD::STRICT_VFMSUB_VL:
22259 case RISCVISD::STRICT_VFNMSUB_VL:
22260 return performVFMADD_VLCombine(N, DCI, Subtarget);
22261 case RISCVISD::FADD_VL:
22262 case RISCVISD::FSUB_VL:
22263 case RISCVISD::FMUL_VL:
22264 case RISCVISD::VFWADD_W_VL:
22265 case RISCVISD::VFWSUB_W_VL:
22266 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
22267 case ISD::LOAD:
22268 case ISD::STORE: {
22269 if (DCI.isAfterLegalizeDAG())
22270 if (SDValue V = performMemPairCombine(N, DCI))
22271 return V;
22272
22273 if (N->getOpcode() != ISD::STORE)
22274 break;
22275
22276 auto *Store = cast<StoreSDNode>(N);
22277 SDValue Chain = Store->getChain();
22278 EVT MemVT = Store->getMemoryVT();
22279 SDValue Val = Store->getValue();
22280 SDLoc DL(N);
22281
22282 bool IsScalarizable =
22283 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
22284 Store->isSimple() &&
22285 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
22286 isPowerOf2_64(MemVT.getSizeInBits()) &&
22287 MemVT.getSizeInBits() <= Subtarget.getXLen();
22288
22289 // If sufficiently aligned we can scalarize stores of constant vectors of
22290 // any power-of-two size up to XLen bits, provided that they aren't too
22291 // expensive to materialize.
22292 // vsetivli zero, 2, e8, m1, ta, ma
22293 // vmv.v.i v8, 4
22294 // vse64.v v8, (a0)
22295 // ->
22296 // li a1, 1028
22297 // sh a1, 0(a0)
22298 if (DCI.isBeforeLegalize() && IsScalarizable &&
22300 // Get the constant vector bits
22301 APInt NewC(Val.getValueSizeInBits(), 0);
22302 uint64_t EltSize = Val.getScalarValueSizeInBits();
22303 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
22304 if (Val.getOperand(i).isUndef())
22305 continue;
22306 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
22307 i * EltSize);
22308 }
22309 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
22310
22311 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
22312 true) <= 2 &&
22314 NewVT, *Store->getMemOperand())) {
22315 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
22316 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
22317 Store->getPointerInfo(), Store->getBaseAlign(),
22318 Store->getMemOperand()->getFlags());
22319 }
22320 }
22321
22322 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
22323 // vsetivli zero, 2, e16, m1, ta, ma
22324 // vle16.v v8, (a0)
22325 // vse16.v v8, (a1)
22326 if (auto *L = dyn_cast<LoadSDNode>(Val);
22327 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
22328 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
22329 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
22330 L->getMemoryVT() == MemVT) {
22331 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
22333 NewVT, *Store->getMemOperand()) &&
22335 NewVT, *L->getMemOperand())) {
22336 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
22337 L->getPointerInfo(), L->getBaseAlign(),
22338 L->getMemOperand()->getFlags());
22339 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
22340 Store->getPointerInfo(), Store->getBaseAlign(),
22341 Store->getMemOperand()->getFlags());
22342 }
22343 }
22344
22345 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
22346 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
22347 // any illegal types.
22348 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
22349 (DCI.isAfterLegalizeDAG() &&
22351 isNullConstant(Val.getOperand(1)))) &&
22352 Val.hasOneUse()) {
22353 SDValue Src = Val.getOperand(0);
22354 MVT VecVT = Src.getSimpleValueType();
22355 // VecVT should be scalable and memory VT should match the element type.
22356 if (!Store->isIndexed() && VecVT.isScalableVector() &&
22357 MemVT == VecVT.getVectorElementType()) {
22358 SDLoc DL(N);
22359 MVT MaskVT = getMaskTypeFor(VecVT);
22360 return DAG.getStoreVP(
22361 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
22362 DAG.getConstant(1, DL, MaskVT),
22363 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
22364 Store->getMemOperand(), Store->getAddressingMode(),
22365 Store->isTruncatingStore(), /*IsCompress*/ false);
22366 }
22367 }
22368
22369 break;
22370 }
22371 case ISD::SPLAT_VECTOR: {
22372 EVT VT = N->getValueType(0);
22373 // Only perform this combine on legal MVT types.
22374 if (!isTypeLegal(VT))
22375 break;
22376 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
22377 DAG, Subtarget))
22378 return Gather;
22379 break;
22380 }
22381 case ISD::BUILD_VECTOR:
22382 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
22383 return V;
22384 break;
22386 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
22387 return V;
22388 break;
22390 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
22391 return V;
22392 break;
22394 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
22395 return V;
22396 break;
22397 case RISCVISD::VFMV_V_F_VL: {
22398 const MVT VT = N->getSimpleValueType(0);
22399 SDValue Passthru = N->getOperand(0);
22400 SDValue Scalar = N->getOperand(1);
22401 SDValue VL = N->getOperand(2);
22402
22403 // If VL is 1, we can use vfmv.s.f.
22404 if (isOneConstant(VL))
22405 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
22406 break;
22407 }
22408 case RISCVISD::VMV_V_X_VL: {
22409 const MVT VT = N->getSimpleValueType(0);
22410 SDValue Passthru = N->getOperand(0);
22411 SDValue Scalar = N->getOperand(1);
22412 SDValue VL = N->getOperand(2);
22413
22414 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
22415 // scalar input.
22416 unsigned ScalarSize = Scalar.getValueSizeInBits();
22417 unsigned EltWidth = VT.getScalarSizeInBits();
22418 if (ScalarSize > EltWidth && Passthru.isUndef())
22419 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
22420 return SDValue(N, 0);
22421
22422 // If VL is 1 and the scalar value won't benefit from immediate, we can
22423 // use vmv.s.x.
22425 if (isOneConstant(VL) &&
22426 (!Const || Const->isZero() ||
22427 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
22428 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
22429
22430 break;
22431 }
22432 case RISCVISD::VFMV_S_F_VL: {
22433 SDValue Src = N->getOperand(1);
22434 // Try to remove vector->scalar->vector if the scalar->vector is inserting
22435 // into an undef vector.
22436 // TODO: Could use a vslide or vmv.v.v for non-undef.
22437 if (N->getOperand(0).isUndef() &&
22438 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22439 isNullConstant(Src.getOperand(1)) &&
22440 Src.getOperand(0).getValueType().isScalableVector()) {
22441 EVT VT = N->getValueType(0);
22442 SDValue EVSrc = Src.getOperand(0);
22443 EVT EVSrcVT = EVSrc.getValueType();
22445 // Widths match, just return the original vector.
22446 if (EVSrcVT == VT)
22447 return EVSrc;
22448 SDLoc DL(N);
22449 // Width is narrower, using insert_subvector.
22450 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
22451 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
22452 EVSrc,
22453 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
22454 }
22455 // Width is wider, using extract_subvector.
22456 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
22457 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
22458 }
22459 [[fallthrough]];
22460 }
22461 case RISCVISD::VMV_S_X_VL: {
22462 const MVT VT = N->getSimpleValueType(0);
22463 SDValue Passthru = N->getOperand(0);
22464 SDValue Scalar = N->getOperand(1);
22465 SDValue VL = N->getOperand(2);
22466
22467 // The vmv.s.x instruction copies the scalar integer register to element 0
22468 // of the destination vector register. If SEW < XLEN, the least-significant
22469 // bits are copied and the upper XLEN-SEW bits are ignored.
22470 unsigned ScalarSize = Scalar.getValueSizeInBits();
22471 unsigned EltWidth = VT.getScalarSizeInBits();
22472 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
22473 return SDValue(N, 0);
22474
22475 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
22476 Scalar.getOperand(0).getValueType() == N->getValueType(0))
22477 return Scalar.getOperand(0);
22478
22479 // Use M1 or smaller to avoid over constraining register allocation
22480 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
22481 if (M1VT.bitsLT(VT)) {
22482 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
22483 SDValue Result =
22484 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
22485 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
22486 return Result;
22487 }
22488
22489 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
22490 // higher would involve overly constraining the register allocator for
22491 // no purpose.
22492 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
22493 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
22494 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
22495 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
22496
22497 break;
22498 }
22499 case RISCVISD::VMV_X_S: {
22500 SDValue Vec = N->getOperand(0);
22501 MVT VecVT = N->getOperand(0).getSimpleValueType();
22502 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
22503 if (M1VT.bitsLT(VecVT)) {
22504 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
22505 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getValueType(0), Vec);
22506 }
22507 break;
22508 }
22512 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
22513 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
22514 switch (IntNo) {
22515 // By default we do not combine any intrinsic.
22516 default:
22517 return SDValue();
22518 case Intrinsic::riscv_vcpop:
22519 case Intrinsic::riscv_vcpop_mask:
22520 case Intrinsic::riscv_vfirst:
22521 case Intrinsic::riscv_vfirst_mask: {
22522 SDValue VL = N->getOperand(2);
22523 if (IntNo == Intrinsic::riscv_vcpop_mask ||
22524 IntNo == Intrinsic::riscv_vfirst_mask)
22525 VL = N->getOperand(3);
22526 if (!isNullConstant(VL))
22527 return SDValue();
22528 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
22529 SDLoc DL(N);
22530 EVT VT = N->getValueType(0);
22531 if (IntNo == Intrinsic::riscv_vfirst ||
22532 IntNo == Intrinsic::riscv_vfirst_mask)
22533 return DAG.getAllOnesConstant(DL, VT);
22534 return DAG.getConstant(0, DL, VT);
22535 }
22536 case Intrinsic::riscv_vsseg2_mask:
22537 case Intrinsic::riscv_vsseg3_mask:
22538 case Intrinsic::riscv_vsseg4_mask:
22539 case Intrinsic::riscv_vsseg5_mask:
22540 case Intrinsic::riscv_vsseg6_mask:
22541 case Intrinsic::riscv_vsseg7_mask:
22542 case Intrinsic::riscv_vsseg8_mask: {
22543 SDValue Tuple = N->getOperand(2);
22544 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
22545
22546 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
22547 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
22548 !Tuple.getOperand(0).isUndef())
22549 return SDValue();
22550
22551 SDValue Val = Tuple.getOperand(1);
22552 unsigned Idx = Tuple.getConstantOperandVal(2);
22553
22554 unsigned SEW = Val.getValueType().getScalarSizeInBits();
22555 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
22556 "Type mismatch without bitcast?");
22557 unsigned Stride = SEW / 8 * NF;
22558 unsigned Offset = SEW / 8 * Idx;
22559
22560 SDValue Ops[] = {
22561 /*Chain=*/N->getOperand(0),
22562 /*IntID=*/
22563 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
22564 /*StoredVal=*/Val,
22565 /*Ptr=*/
22566 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
22567 DAG.getConstant(Offset, DL, XLenVT)),
22568 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
22569 /*Mask=*/N->getOperand(4),
22570 /*VL=*/N->getOperand(5)};
22571
22572 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
22573 // Match getTgtMemIntrinsic for non-unit stride case
22574 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
22577 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
22578
22579 SDVTList VTs = DAG.getVTList(MVT::Other);
22580 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
22581 MMO);
22582 }
22583 }
22584 }
22585 case ISD::EXPERIMENTAL_VP_REVERSE:
22586 return performVP_REVERSECombine(N, DAG, Subtarget);
22587 case ISD::VP_STORE:
22588 return performVP_STORECombine(N, DAG, Subtarget);
22589 case ISD::BITCAST: {
22590 assert(Subtarget.useRVVForFixedLengthVectors());
22591 SDValue N0 = N->getOperand(0);
22592 EVT VT = N->getValueType(0);
22593 EVT SrcVT = N0.getValueType();
22594 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
22595 unsigned NF = VT.getRISCVVectorTupleNumFields();
22596 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
22597 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
22598 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
22599
22600 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
22601
22602 SDValue Result = DAG.getUNDEF(VT);
22603 for (unsigned i = 0; i < NF; ++i)
22604 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
22605 DAG.getTargetConstant(i, DL, MVT::i32));
22606 return Result;
22607 }
22608 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
22609 // type, widen both sides to avoid a trip through memory.
22610 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
22611 VT.isScalarInteger()) {
22612 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
22613 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
22614 Ops[0] = N0;
22615 SDLoc DL(N);
22616 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
22617 N0 = DAG.getBitcast(MVT::i8, N0);
22618 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
22619 }
22620
22621 return SDValue();
22622 }
22623 case ISD::VECREDUCE_ADD:
22624 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
22625 return V;
22626 [[fallthrough]];
22627 case ISD::CTPOP:
22628 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
22629 return V;
22630 break;
22631 case RISCVISD::VRGATHER_VX_VL: {
22632 // Note this assumes that out of bounds indices produce poison
22633 // and can thus be replaced without having to prove them inbounds..
22634 EVT VT = N->getValueType(0);
22635 SDValue Src = N->getOperand(0);
22636 SDValue Idx = N->getOperand(1);
22637 SDValue Passthru = N->getOperand(2);
22638 SDValue VL = N->getOperand(4);
22639
22640 // Warning: Unlike most cases we strip an insert_subvector, this one
22641 // does not require the first operand to be undef.
22642 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
22643 isNullConstant(Src.getOperand(2)))
22644 Src = Src.getOperand(1);
22645
22646 switch (Src.getOpcode()) {
22647 default:
22648 break;
22649 case RISCVISD::VMV_V_X_VL:
22650 case RISCVISD::VFMV_V_F_VL:
22651 // Drop a redundant vrgather_vx.
22652 // TODO: Remove the type restriction if we find a motivating
22653 // test case?
22654 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
22655 Src.getValueType() == VT)
22656 return Src;
22657 break;
22658 case RISCVISD::VMV_S_X_VL:
22659 case RISCVISD::VFMV_S_F_VL:
22660 // If this use only demands lane zero from the source vmv.s.x, and
22661 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
22662 // a vmv.v.x. Note that there can be other uses of the original
22663 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
22664 if (isNullConstant(Idx) && Passthru.isUndef() &&
22665 VL == Src.getOperand(2)) {
22666 unsigned Opc =
22667 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
22668 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
22669 VL);
22670 }
22671 break;
22672 }
22673 break;
22674 }
22675 case RISCVISD::TUPLE_EXTRACT: {
22676 EVT VT = N->getValueType(0);
22677 SDValue Tuple = N->getOperand(0);
22678 unsigned Idx = N->getConstantOperandVal(1);
22679 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
22680 break;
22681
22682 unsigned NF = 0;
22683 switch (Tuple.getConstantOperandVal(1)) {
22684 default:
22685 break;
22686 case Intrinsic::riscv_vlseg2_mask:
22687 case Intrinsic::riscv_vlseg3_mask:
22688 case Intrinsic::riscv_vlseg4_mask:
22689 case Intrinsic::riscv_vlseg5_mask:
22690 case Intrinsic::riscv_vlseg6_mask:
22691 case Intrinsic::riscv_vlseg7_mask:
22692 case Intrinsic::riscv_vlseg8_mask:
22693 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
22694 break;
22695 }
22696
22697 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
22698 break;
22699
22700 unsigned SEW = VT.getScalarSizeInBits();
22701 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
22702 "Type mismatch without bitcast?");
22703 unsigned Stride = SEW / 8 * NF;
22704 unsigned Offset = SEW / 8 * Idx;
22705
22706 SDValue Passthru = Tuple.getOperand(2);
22707 if (Passthru.isUndef())
22708 Passthru = DAG.getUNDEF(VT);
22709 else
22710 Passthru = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VT, Passthru,
22711 N->getOperand(1));
22712
22713 SDValue Ops[] = {
22714 /*Chain=*/Tuple.getOperand(0),
22715 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
22716 /*Passthru=*/Passthru,
22717 /*Ptr=*/
22718 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
22719 DAG.getConstant(Offset, DL, XLenVT)),
22720 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
22721 /*Mask=*/Tuple.getOperand(4),
22722 /*VL=*/Tuple.getOperand(5),
22723 /*Policy=*/Tuple.getOperand(6)};
22724
22725 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
22726 // Match getTgtMemIntrinsic for non-unit stride case
22727 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
22730 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
22731
22732 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
22734 Ops, MemVT, MMO);
22735 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
22736 return Result.getValue(0);
22737 }
22738 case RISCVISD::TUPLE_INSERT: {
22739 // tuple_insert tuple, undef, idx -> tuple
22740 if (N->getOperand(1).isUndef())
22741 return N->getOperand(0);
22742 break;
22743 }
22744 case RISCVISD::VMERGE_VL: {
22745 // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl
22746 SDValue Mask = N->getOperand(0);
22747 SDValue True = N->getOperand(1);
22748 SDValue Passthru = N->getOperand(3);
22749 SDValue VL = N->getOperand(4);
22750
22751 // Fixed vectors are wrapped in scalable containers, unwrap them.
22752 using namespace SDPatternMatch;
22753 SDValue SubVec;
22754 if (sd_match(Mask, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
22755 Mask = SubVec;
22756
22757 if (!isOneOrOneSplat(Mask))
22758 break;
22759
22760 return DAG.getNode(RISCVISD::VMV_V_V_VL, SDLoc(N), N->getValueType(0),
22761 Passthru, True, VL);
22762 }
22763 case RISCVISD::VMV_V_V_VL: {
22764 // vmv_v_v passthru, splat(x), vl -> vmv_v_x passthru, x, vl
22765 SDValue Passthru = N->getOperand(0);
22766 SDValue Src = N->getOperand(1);
22767 SDValue VL = N->getOperand(2);
22768
22769 // Fixed vectors are wrapped in scalable containers, unwrap them.
22770 using namespace SDPatternMatch;
22771 SDValue SubVec;
22772 if (sd_match(Src, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
22773 Src = SubVec;
22774
22775 SDValue SplatVal = DAG.getSplatValue(Src, /*LegalTypes=*/true);
22776 if (!SplatVal)
22777 break;
22778 MVT VT = N->getSimpleValueType(0);
22779 return lowerScalarSplat(Passthru, SplatVal, VL, VT, SDLoc(N), DAG,
22780 Subtarget);
22781 }
22782 case RISCVISD::VSLIDEDOWN_VL:
22783 case RISCVISD::VSLIDEUP_VL:
22784 if (N->getOperand(1)->isUndef())
22785 return N->getOperand(0);
22786 break;
22787 case RISCVISD::VSLIDE1UP_VL:
22788 case RISCVISD::VFSLIDE1UP_VL: {
22789 using namespace SDPatternMatch;
22790 SDValue SrcVec;
22791 SDLoc DL(N);
22792 MVT VT = N->getSimpleValueType(0);
22793 // If the scalar we're sliding in was extracted from the first element of a
22794 // vector, we can use that vector as the passthru in a normal slideup of 1.
22795 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
22796 if (!N->getOperand(0).isUndef() ||
22797 !sd_match(N->getOperand(2),
22798 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
22799 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
22800 break;
22801
22802 MVT SrcVecVT = SrcVec.getSimpleValueType();
22803 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
22804 break;
22805 // Adapt the value type of source vector.
22806 if (SrcVecVT.isFixedLengthVector()) {
22807 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
22808 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
22809 }
22811 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
22812 else
22813 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
22814
22815 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
22816 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
22817 N->getOperand(4));
22818 }
22819 }
22820
22821 return SDValue();
22822}
22823
22825 EVT XVT, unsigned KeptBits) const {
22826 // For vectors, we don't have a preference..
22827 if (XVT.isVector())
22828 return false;
22829
22830 if (XVT != MVT::i32 && XVT != MVT::i64)
22831 return false;
22832
22833 // We can use sext.w for RV64 or an srai 31 on RV32.
22834 if (KeptBits == 32 || KeptBits == 64)
22835 return true;
22836
22837 // With Zbb we can use sext.h/sext.b.
22838 return Subtarget.hasStdExtZbb() &&
22839 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
22840 KeptBits == 16);
22841}
22842
22844 const SDNode *N, CombineLevel Level) const {
22845 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
22846 N->getOpcode() == ISD::SRL) &&
22847 "Expected shift op");
22848
22849 // The following folds are only desirable if `(OP _, c1 << c2)` can be
22850 // materialised in fewer instructions than `(OP _, c1)`:
22851 //
22852 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
22853 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
22854 SDValue N0 = N->getOperand(0);
22855 EVT Ty = N0.getValueType();
22856
22857 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
22858 // LD/ST, it can still complete the folding optimization operation performed
22859 // above.
22860 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
22861 for (SDNode *Use : X->users()) {
22862 // This use is the one we're on right now. Skip it
22863 if (Use == User || Use->getOpcode() == ISD::SELECT)
22864 continue;
22866 return false;
22867 }
22868 return true;
22869 };
22870
22871 if (Ty.isScalarInteger() &&
22872 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
22873 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
22874 return isUsedByLdSt(N0.getNode(), N);
22875
22876 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22877 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
22878
22879 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
22880 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
22881 N->user_begin()->getOpcode() == ISD::ADD &&
22882 !isUsedByLdSt(*N->user_begin(), nullptr) &&
22883 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
22884 return false;
22885
22886 if (C1 && C2) {
22887 const APInt &C1Int = C1->getAPIntValue();
22888 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
22889
22890 // We can materialise `c1 << c2` into an add immediate, so it's "free",
22891 // and the combine should happen, to potentially allow further combines
22892 // later.
22893 if (ShiftedC1Int.getSignificantBits() <= 64 &&
22894 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
22895 return true;
22896
22897 // We can materialise `c1` in an add immediate, so it's "free", and the
22898 // combine should be prevented.
22899 if (C1Int.getSignificantBits() <= 64 &&
22901 return false;
22902
22903 // Neither constant will fit into an immediate, so find materialisation
22904 // costs.
22905 int C1Cost =
22906 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
22907 /*CompressionCost*/ true);
22908 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
22909 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
22910 /*CompressionCost*/ true);
22911
22912 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
22913 // combine should be prevented.
22914 if (C1Cost < ShiftedC1Cost)
22915 return false;
22916 }
22917 }
22918
22919 if (!N0->hasOneUse())
22920 return false;
22921
22922 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
22923 N0->getOperand(0)->getOpcode() == ISD::ADD &&
22924 !N0->getOperand(0)->hasOneUse())
22925 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
22926
22927 return true;
22928}
22929
22931 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
22932 TargetLoweringOpt &TLO) const {
22933 // Delay this optimization as late as possible.
22934 if (!TLO.LegalOps)
22935 return false;
22936
22937 EVT VT = Op.getValueType();
22938 if (VT.isVector())
22939 return false;
22940
22941 unsigned Opcode = Op.getOpcode();
22942 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
22943 return false;
22944
22945 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
22946 if (!C)
22947 return false;
22948
22949 const APInt &Mask = C->getAPIntValue();
22950
22951 // Clear all non-demanded bits initially.
22952 APInt ShrunkMask = Mask & DemandedBits;
22953
22954 // Try to make a smaller immediate by setting undemanded bits.
22955
22956 APInt ExpandedMask = Mask | ~DemandedBits;
22957
22958 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
22959 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
22960 };
22961 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
22962 if (NewMask == Mask)
22963 return true;
22964 SDLoc DL(Op);
22965 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
22966 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
22967 Op.getOperand(0), NewC);
22968 return TLO.CombineTo(Op, NewOp);
22969 };
22970
22971 // If the shrunk mask fits in sign extended 12 bits, let the target
22972 // independent code apply it.
22973 if (ShrunkMask.isSignedIntN(12))
22974 return false;
22975
22976 // And has a few special cases for zext.
22977 if (Opcode == ISD::AND) {
22978 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
22979 // otherwise use SLLI + SRLI.
22980 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
22981 if (IsLegalMask(NewMask))
22982 return UseMask(NewMask);
22983
22984 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
22985 if (VT == MVT::i64) {
22986 APInt NewMask = APInt(64, 0xffffffff);
22987 if (IsLegalMask(NewMask))
22988 return UseMask(NewMask);
22989 }
22990 }
22991
22992 // For the remaining optimizations, we need to be able to make a negative
22993 // number through a combination of mask and undemanded bits.
22994 if (!ExpandedMask.isNegative())
22995 return false;
22996
22997 // What is the fewest number of bits we need to represent the negative number.
22998 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
22999
23000 // Try to make a 12 bit negative immediate. If that fails try to make a 32
23001 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
23002 // If we can't create a simm12, we shouldn't change opaque constants.
23003 APInt NewMask = ShrunkMask;
23004 if (MinSignedBits <= 12)
23005 NewMask.setBitsFrom(11);
23006 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
23007 NewMask.setBitsFrom(31);
23008 else
23009 return false;
23010
23011 // Check that our new mask is a subset of the demanded mask.
23012 assert(IsLegalMask(NewMask));
23013 return UseMask(NewMask);
23014}
23015
23016static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
23017 static const uint64_t GREVMasks[] = {
23018 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
23019 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
23020
23021 for (unsigned Stage = 0; Stage != 6; ++Stage) {
23022 unsigned Shift = 1 << Stage;
23023 if (ShAmt & Shift) {
23024 uint64_t Mask = GREVMasks[Stage];
23025 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
23026 if (IsGORC)
23027 Res |= x;
23028 x = Res;
23029 }
23030 }
23031
23032 return x;
23033}
23034
23036 KnownBits &Known,
23037 const APInt &DemandedElts,
23038 const SelectionDAG &DAG,
23039 unsigned Depth) const {
23040 unsigned BitWidth = Known.getBitWidth();
23041 unsigned Opc = Op.getOpcode();
23046 "Should use MaskedValueIsZero if you don't know whether Op"
23047 " is a target node!");
23048
23049 Known.resetAll();
23050 switch (Opc) {
23051 default: break;
23052 case RISCVISD::SELECT_CC: {
23053 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
23054 // If we don't know any bits, early out.
23055 if (Known.isUnknown())
23056 break;
23057 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
23058
23059 // Only known if known in both the LHS and RHS.
23060 Known = Known.intersectWith(Known2);
23061 break;
23062 }
23063 case RISCVISD::VCPOP_VL: {
23064 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
23065 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
23066 break;
23067 }
23068 case RISCVISD::CZERO_EQZ:
23069 case RISCVISD::CZERO_NEZ:
23070 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
23071 // Result is either all zero or operand 0. We can propagate zeros, but not
23072 // ones.
23073 Known.One.clearAllBits();
23074 break;
23075 case RISCVISD::REMUW: {
23076 KnownBits Known2;
23077 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
23078 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
23079 // We only care about the lower 32 bits.
23080 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
23081 // Restore the original width by sign extending.
23082 Known = Known.sext(BitWidth);
23083 break;
23084 }
23085 case RISCVISD::DIVUW: {
23086 KnownBits Known2;
23087 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
23088 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
23089 // We only care about the lower 32 bits.
23090 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
23091 // Restore the original width by sign extending.
23092 Known = Known.sext(BitWidth);
23093 break;
23094 }
23095 case RISCVISD::SLLW: {
23096 KnownBits Known2;
23097 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
23098 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
23099 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
23100 // Restore the original width by sign extending.
23101 Known = Known.sext(BitWidth);
23102 break;
23103 }
23104 case RISCVISD::SRLW: {
23105 KnownBits Known2;
23106 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
23107 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
23108 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
23109 // Restore the original width by sign extending.
23110 Known = Known.sext(BitWidth);
23111 break;
23112 }
23113 case RISCVISD::SRAW: {
23114 KnownBits Known2;
23115 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
23116 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
23117 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
23118 // Restore the original width by sign extending.
23119 Known = Known.sext(BitWidth);
23120 break;
23121 }
23122 case RISCVISD::SHL_ADD: {
23123 KnownBits Known2;
23124 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
23125 unsigned ShAmt = Op.getConstantOperandVal(1);
23126 Known <<= ShAmt;
23127 Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown
23128 Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
23129 Known = KnownBits::add(Known, Known2);
23130 break;
23131 }
23132 case RISCVISD::CTZW: {
23133 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
23134 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
23135 unsigned LowBits = llvm::bit_width(PossibleTZ);
23136 Known.Zero.setBitsFrom(LowBits);
23137 break;
23138 }
23139 case RISCVISD::CLZW: {
23140 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
23141 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
23142 unsigned LowBits = llvm::bit_width(PossibleLZ);
23143 Known.Zero.setBitsFrom(LowBits);
23144 break;
23145 }
23146 case RISCVISD::CLSW: {
23147 // The upper 32 bits are ignored by the instruction, but ComputeNumSignBits
23148 // doesn't give us a way to ignore them. If there are fewer than 33 sign
23149 // bits in the input consider it as having no redundant sign bits. Otherwise
23150 // the lower bound of the result is NumSignBits-33. The maximum value of the
23151 // the result is 31.
23152 unsigned NumSignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
23153 unsigned MinRedundantSignBits = NumSignBits < 33 ? 0 : NumSignBits - 33;
23154 // Create a ConstantRange [MinRedundantSignBits, 32) and convert it to
23155 // KnownBits.
23156 ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
23157 APInt(BitWidth, 32));
23158 Known = Range.toKnownBits();
23159 break;
23160 }
23161 case RISCVISD::BREV8:
23162 case RISCVISD::ORC_B: {
23163 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
23164 // control value of 7 is equivalent to brev8 and orc.b.
23165 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
23166 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
23167 // To compute zeros for ORC_B, we need to invert the value and invert it
23168 // back after. This inverting is harmless for BREV8.
23169 Known.Zero =
23170 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
23171 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
23172 break;
23173 }
23174 case RISCVISD::READ_VLENB: {
23175 // We can use the minimum and maximum VLEN values to bound VLENB. We
23176 // know VLEN must be a power of two.
23177 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
23178 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
23179 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
23180 Known.Zero.setLowBits(Log2_32(MinVLenB));
23181 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
23182 if (MaxVLenB == MinVLenB)
23183 Known.One.setBit(Log2_32(MinVLenB));
23184 break;
23185 }
23186 case RISCVISD::FCLASS: {
23187 // fclass will only set one of the low 10 bits.
23188 Known.Zero.setBitsFrom(10);
23189 break;
23190 }
23193 unsigned IntNo =
23194 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
23195 switch (IntNo) {
23196 default:
23197 // We can't do anything for most intrinsics.
23198 break;
23199 case Intrinsic::riscv_vsetvli:
23200 case Intrinsic::riscv_vsetvlimax: {
23201 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
23202 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
23203 RISCVVType::VLMUL VLMUL =
23204 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
23205 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
23206 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
23207 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
23208 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
23209
23210 // Result of vsetvli must be not larger than AVL.
23211 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
23212 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
23213
23214 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
23215 if (BitWidth > KnownZeroFirstBit)
23216 Known.Zero.setBitsFrom(KnownZeroFirstBit);
23217 break;
23218 }
23219 }
23220 break;
23221 }
23222 }
23223}
23224
23226 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
23227 unsigned Depth) const {
23228 switch (Op.getOpcode()) {
23229 default:
23230 break;
23231 case RISCVISD::SELECT_CC: {
23232 unsigned Tmp =
23233 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
23234 if (Tmp == 1) return 1; // Early out.
23235 unsigned Tmp2 =
23236 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
23237 return std::min(Tmp, Tmp2);
23238 }
23239 case RISCVISD::CZERO_EQZ:
23240 case RISCVISD::CZERO_NEZ:
23241 // Output is either all zero or operand 0. We can propagate sign bit count
23242 // from operand 0.
23243 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
23244 case RISCVISD::NEGW_MAX: {
23245 // We expand this at isel to negw+max. The result will have 33 sign bits
23246 // if the input has at least 33 sign bits.
23247 unsigned Tmp =
23248 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
23249 if (Tmp < 33) return 1;
23250 return 33;
23251 }
23252 case RISCVISD::SRAW: {
23253 unsigned Tmp =
23254 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
23255 // sraw produces at least 33 sign bits. If the input already has more than
23256 // 33 sign bits sraw, will preserve them.
23257 // TODO: A more precise answer could be calculated depending on known bits
23258 // in the shift amount.
23259 return std::max(Tmp, 33U);
23260 }
23261 case RISCVISD::SLLW:
23262 case RISCVISD::SRLW:
23263 case RISCVISD::DIVW:
23264 case RISCVISD::DIVUW:
23265 case RISCVISD::REMUW:
23266 case RISCVISD::ROLW:
23267 case RISCVISD::RORW:
23268 case RISCVISD::ABSW:
23269 case RISCVISD::FCVT_W_RV64:
23270 case RISCVISD::FCVT_WU_RV64:
23271 case RISCVISD::STRICT_FCVT_W_RV64:
23272 case RISCVISD::STRICT_FCVT_WU_RV64:
23273 // TODO: As the result is sign-extended, this is conservatively correct.
23274 return 33;
23275 case RISCVISD::VMV_X_S: {
23276 // The number of sign bits of the scalar result is computed by obtaining the
23277 // element type of the input vector operand, subtracting its width from the
23278 // XLEN, and then adding one (sign bit within the element type). If the
23279 // element type is wider than XLen, the least-significant XLEN bits are
23280 // taken.
23281 unsigned XLen = Subtarget.getXLen();
23282 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
23283 if (EltBits <= XLen)
23284 return XLen - EltBits + 1;
23285 break;
23286 }
23288 unsigned IntNo = Op.getConstantOperandVal(1);
23289 switch (IntNo) {
23290 default:
23291 break;
23292 case Intrinsic::riscv_masked_atomicrmw_xchg:
23293 case Intrinsic::riscv_masked_atomicrmw_add:
23294 case Intrinsic::riscv_masked_atomicrmw_sub:
23295 case Intrinsic::riscv_masked_atomicrmw_nand:
23296 case Intrinsic::riscv_masked_atomicrmw_max:
23297 case Intrinsic::riscv_masked_atomicrmw_min:
23298 case Intrinsic::riscv_masked_atomicrmw_umax:
23299 case Intrinsic::riscv_masked_atomicrmw_umin:
23300 case Intrinsic::riscv_masked_cmpxchg:
23301 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
23302 // narrow atomic operation. These are implemented using atomic
23303 // operations at the minimum supported atomicrmw/cmpxchg width whose
23304 // result is then sign extended to XLEN. With +A, the minimum width is
23305 // 32 for both 64 and 32.
23307 assert(Subtarget.hasStdExtZalrsc());
23308 return Op.getValueSizeInBits() - 31;
23309 }
23310 break;
23311 }
23312 }
23313
23314 return 1;
23315}
23316
23318 SDValue Op, const APInt &OriginalDemandedBits,
23319 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
23320 unsigned Depth) const {
23321 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
23322
23323 switch (Op.getOpcode()) {
23324 case RISCVISD::BREV8:
23325 case RISCVISD::ORC_B: {
23326 KnownBits Known2;
23327 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
23328 // For BREV8, we need to do BREV8 on the demanded bits.
23329 // For ORC_B, any bit in the output demandeds all bits from the same byte.
23330 // So we need to do ORC_B on the demanded bits.
23332 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
23333 7, IsGORC));
23334 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
23335 OriginalDemandedElts, Known2, TLO, Depth + 1))
23336 return true;
23337
23338 // To compute zeros for ORC_B, we need to invert the value and invert it
23339 // back after. This inverting is harmless for BREV8.
23340 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
23341 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
23342 return false;
23343 }
23344 }
23345
23347 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
23348}
23349
23351 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
23352 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
23353
23354 // TODO: Add more target nodes.
23355 switch (Op.getOpcode()) {
23356 case RISCVISD::SLLW:
23357 case RISCVISD::SRAW:
23358 case RISCVISD::SRLW:
23359 case RISCVISD::RORW:
23360 case RISCVISD::ROLW:
23361 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
23362 // amount is bounds.
23363 return false;
23364 case RISCVISD::SELECT_CC:
23365 // Integer comparisons cannot create poison.
23366 assert(Op.getOperand(0).getValueType().isInteger() &&
23367 "RISCVISD::SELECT_CC only compares integers");
23368 return false;
23369 }
23371 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
23372}
23373
23374const Constant *
23376 assert(Ld && "Unexpected null LoadSDNode");
23377 if (!ISD::isNormalLoad(Ld))
23378 return nullptr;
23379
23380 SDValue Ptr = Ld->getBasePtr();
23381
23382 // Only constant pools with no offset are supported.
23383 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
23384 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
23385 if (!CNode || CNode->isMachineConstantPoolEntry() ||
23386 CNode->getOffset() != 0)
23387 return nullptr;
23388
23389 return CNode;
23390 };
23391
23392 // Simple case, LLA.
23393 if (Ptr.getOpcode() == RISCVISD::LLA) {
23394 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
23395 if (!CNode || CNode->getTargetFlags() != 0)
23396 return nullptr;
23397
23398 return CNode->getConstVal();
23399 }
23400
23401 // Look for a HI and ADD_LO pair.
23402 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
23403 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
23404 return nullptr;
23405
23406 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
23407 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
23408
23409 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
23410 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
23411 return nullptr;
23412
23413 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
23414 return nullptr;
23415
23416 return CNodeLo->getConstVal();
23417}
23418
23420 MachineBasicBlock *BB) {
23421 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
23422
23423 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
23424 // Should the count have wrapped while it was being read, we need to try
23425 // again.
23426 // For example:
23427 // ```
23428 // read:
23429 // csrrs x3, counterh # load high word of counter
23430 // csrrs x2, counter # load low word of counter
23431 // csrrs x4, counterh # load high word of counter
23432 // bne x3, x4, read # check if high word reads match, otherwise try again
23433 // ```
23434
23435 MachineFunction &MF = *BB->getParent();
23436 const BasicBlock *LLVMBB = BB->getBasicBlock();
23438
23439 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
23440 MF.insert(It, LoopMBB);
23441
23442 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
23443 MF.insert(It, DoneMBB);
23444
23445 // Transfer the remainder of BB and its successor edges to DoneMBB.
23446 DoneMBB->splice(DoneMBB->begin(), BB,
23447 std::next(MachineBasicBlock::iterator(MI)), BB->end());
23449
23450 BB->addSuccessor(LoopMBB);
23451
23453 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
23454 Register LoReg = MI.getOperand(0).getReg();
23455 Register HiReg = MI.getOperand(1).getReg();
23456 int64_t LoCounter = MI.getOperand(2).getImm();
23457 int64_t HiCounter = MI.getOperand(3).getImm();
23458 DebugLoc DL = MI.getDebugLoc();
23459
23461 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
23462 .addImm(HiCounter)
23463 .addReg(RISCV::X0);
23464 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
23465 .addImm(LoCounter)
23466 .addReg(RISCV::X0);
23467 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
23468 .addImm(HiCounter)
23469 .addReg(RISCV::X0);
23470
23471 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
23472 .addReg(HiReg)
23473 .addReg(ReadAgainReg)
23474 .addMBB(LoopMBB);
23475
23476 LoopMBB->addSuccessor(LoopMBB);
23477 LoopMBB->addSuccessor(DoneMBB);
23478
23479 MI.eraseFromParent();
23480
23481 return DoneMBB;
23482}
23483
23486 const RISCVSubtarget &Subtarget) {
23487 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
23488
23489 MachineFunction &MF = *BB->getParent();
23490 DebugLoc DL = MI.getDebugLoc();
23491 const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
23492 Register LoReg = MI.getOperand(0).getReg();
23493 Register HiReg = MI.getOperand(1).getReg();
23494 Register SrcReg = MI.getOperand(2).getReg();
23495
23496 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
23497 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
23498
23499 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
23500 Register());
23502 MachineMemOperand *MMOLo =
23506
23507 // For big-endian, the high part is at offset 0 and the low part at offset 4.
23508 if (!Subtarget.isLittleEndian())
23509 std::swap(LoReg, HiReg);
23510
23511 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
23512 .addFrameIndex(FI)
23513 .addImm(0)
23514 .addMemOperand(MMOLo);
23515 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
23516 .addFrameIndex(FI)
23517 .addImm(4)
23518 .addMemOperand(MMOHi);
23519 MI.eraseFromParent(); // The pseudo instruction is gone now.
23520 return BB;
23521}
23522
23525 const RISCVSubtarget &Subtarget) {
23526 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
23527 "Unexpected instruction");
23528
23529 MachineFunction &MF = *BB->getParent();
23530 DebugLoc DL = MI.getDebugLoc();
23531 const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
23532 Register DstReg = MI.getOperand(0).getReg();
23533 Register LoReg = MI.getOperand(1).getReg();
23534 Register HiReg = MI.getOperand(2).getReg();
23535 bool KillLo = MI.getOperand(1).isKill();
23536 bool KillHi = MI.getOperand(2).isKill();
23537
23538 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
23539 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
23540
23542 MachineMemOperand *MMOLo =
23546
23547 // For big-endian, store the high part at offset 0 and the low part at
23548 // offset 4.
23549 if (!Subtarget.isLittleEndian()) {
23550 std::swap(LoReg, HiReg);
23551 std::swap(KillLo, KillHi);
23552 }
23553
23554 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
23555 .addReg(LoReg, getKillRegState(KillLo))
23556 .addFrameIndex(FI)
23557 .addImm(0)
23558 .addMemOperand(MMOLo);
23559 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
23560 .addReg(HiReg, getKillRegState(KillHi))
23561 .addFrameIndex(FI)
23562 .addImm(4)
23563 .addMemOperand(MMOHi);
23564 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register());
23565 MI.eraseFromParent(); // The pseudo instruction is gone now.
23566 return BB;
23567}
23568
23570 unsigned RelOpcode, unsigned EqOpcode,
23571 const RISCVSubtarget &Subtarget) {
23572 DebugLoc DL = MI.getDebugLoc();
23573 Register DstReg = MI.getOperand(0).getReg();
23574 Register Src1Reg = MI.getOperand(1).getReg();
23575 Register Src2Reg = MI.getOperand(2).getReg();
23577 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
23579
23580 // Save the current FFLAGS.
23581 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
23582
23583 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
23584 .addReg(Src1Reg)
23585 .addReg(Src2Reg);
23588
23589 // Restore the FFLAGS.
23590 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
23591 .addReg(SavedFFlags, RegState::Kill);
23592
23593 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
23594 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
23595 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
23596 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
23599
23600 // Erase the pseudoinstruction.
23601 MI.eraseFromParent();
23602 return BB;
23603}
23604
23605static MachineBasicBlock *
23607 MachineBasicBlock *ThisMBB,
23608 const RISCVSubtarget &Subtarget) {
23609 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
23610 // Without this, custom-inserter would have generated:
23611 //
23612 // A
23613 // | \
23614 // | B
23615 // | /
23616 // C
23617 // | \
23618 // | D
23619 // | /
23620 // E
23621 //
23622 // A: X = ...; Y = ...
23623 // B: empty
23624 // C: Z = PHI [X, A], [Y, B]
23625 // D: empty
23626 // E: PHI [X, C], [Z, D]
23627 //
23628 // If we lower both Select_FPRX_ in a single step, we can instead generate:
23629 //
23630 // A
23631 // | \
23632 // | C
23633 // | /|
23634 // |/ |
23635 // | |
23636 // | D
23637 // | /
23638 // E
23639 //
23640 // A: X = ...; Y = ...
23641 // D: empty
23642 // E: PHI [X, A], [X, C], [Y, D]
23643
23644 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
23645 const DebugLoc &DL = First.getDebugLoc();
23646 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
23647 MachineFunction *F = ThisMBB->getParent();
23648 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
23649 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
23650 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
23651 MachineFunction::iterator It = ++ThisMBB->getIterator();
23652 F->insert(It, FirstMBB);
23653 F->insert(It, SecondMBB);
23654 F->insert(It, SinkMBB);
23655
23656 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
23657 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
23659 ThisMBB->end());
23660 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
23661
23662 // Fallthrough block for ThisMBB.
23663 ThisMBB->addSuccessor(FirstMBB);
23664 // Fallthrough block for FirstMBB.
23665 FirstMBB->addSuccessor(SecondMBB);
23666 ThisMBB->addSuccessor(SinkMBB);
23667 FirstMBB->addSuccessor(SinkMBB);
23668 // This is fallthrough.
23669 SecondMBB->addSuccessor(SinkMBB);
23670
23671 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
23672 Register FLHS = First.getOperand(1).getReg();
23673 Register FRHS = First.getOperand(2).getReg();
23674 // Insert appropriate branch.
23675 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
23676 .addReg(FLHS)
23677 .addReg(FRHS)
23678 .addMBB(SinkMBB);
23679
23680 Register SLHS = Second.getOperand(1).getReg();
23681 Register SRHS = Second.getOperand(2).getReg();
23682 Register Op1Reg4 = First.getOperand(4).getReg();
23683 Register Op1Reg5 = First.getOperand(5).getReg();
23684
23685 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
23686 // Insert appropriate branch.
23687 BuildMI(ThisMBB, DL,
23688 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
23689 .addReg(SLHS)
23690 .addReg(SRHS)
23691 .addMBB(SinkMBB);
23692
23693 Register DestReg = Second.getOperand(0).getReg();
23694 Register Op2Reg4 = Second.getOperand(4).getReg();
23695 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
23696 .addReg(Op2Reg4)
23697 .addMBB(ThisMBB)
23698 .addReg(Op1Reg4)
23699 .addMBB(FirstMBB)
23700 .addReg(Op1Reg5)
23701 .addMBB(SecondMBB);
23702
23703 // Now remove the Select_FPRX_s.
23704 First.eraseFromParent();
23705 Second.eraseFromParent();
23706 return SinkMBB;
23707}
23708
23711 const RISCVSubtarget &Subtarget) {
23712 // To "insert" Select_* instructions, we actually have to insert the triangle
23713 // control-flow pattern. The incoming instructions know the destination vreg
23714 // to set, the condition code register to branch on, the true/false values to
23715 // select between, and the condcode to use to select the appropriate branch.
23716 //
23717 // We produce the following control flow:
23718 // HeadMBB
23719 // | \
23720 // | IfFalseMBB
23721 // | /
23722 // TailMBB
23723 //
23724 // When we find a sequence of selects we attempt to optimize their emission
23725 // by sharing the control flow. Currently we only handle cases where we have
23726 // multiple selects with the exact same condition (same LHS, RHS and CC).
23727 // The selects may be interleaved with other instructions if the other
23728 // instructions meet some requirements we deem safe:
23729 // - They are not pseudo instructions.
23730 // - They are debug instructions. Otherwise,
23731 // - They do not have side-effects, do not access memory and their inputs do
23732 // not depend on the results of the select pseudo-instructions.
23733 // - They don't adjust stack.
23734 // The TrueV/FalseV operands of the selects cannot depend on the result of
23735 // previous selects in the sequence.
23736 // These conditions could be further relaxed. See the X86 target for a
23737 // related approach and more information.
23738 //
23739 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
23740 // is checked here and handled by a separate function -
23741 // EmitLoweredCascadedSelect.
23742
23743 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
23744 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
23745 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
23746 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
23747 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
23748 Next->getOperand(5).isKill())
23749 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
23750
23751 Register LHS = MI.getOperand(1).getReg();
23752 Register RHS;
23753 if (MI.getOperand(2).isReg())
23754 RHS = MI.getOperand(2).getReg();
23755 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
23756
23757 SmallVector<MachineInstr *, 4> SelectDebugValues;
23758 SmallSet<Register, 4> SelectDests;
23759 SelectDests.insert(MI.getOperand(0).getReg());
23760
23761 MachineInstr *LastSelectPseudo = &MI;
23762 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
23763
23764 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
23765 SequenceMBBI != E; ++SequenceMBBI) {
23766 if (SequenceMBBI->isDebugInstr())
23767 continue;
23768 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
23769 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
23770 !SequenceMBBI->getOperand(2).isReg() ||
23771 SequenceMBBI->getOperand(2).getReg() != RHS ||
23772 SequenceMBBI->getOperand(3).getImm() != CC ||
23773 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
23774 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
23775 break;
23776 LastSelectPseudo = &*SequenceMBBI;
23777 SequenceMBBI->collectDebugValues(SelectDebugValues);
23778 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
23779 continue;
23780 }
23781 if (SequenceMBBI->hasUnmodeledSideEffects() ||
23782 SequenceMBBI->mayLoadOrStore() ||
23783 SequenceMBBI->usesCustomInsertionHook() ||
23784 TII.isFrameInstr(*SequenceMBBI) ||
23785 SequenceMBBI->isStackAligningInlineAsm())
23786 break;
23787 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
23788 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
23789 }))
23790 break;
23791 }
23792
23793 const BasicBlock *LLVM_BB = BB->getBasicBlock();
23794 DebugLoc DL = MI.getDebugLoc();
23796
23797 MachineBasicBlock *HeadMBB = BB;
23798 MachineFunction *F = BB->getParent();
23799 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
23800 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
23801
23802 F->insert(I, IfFalseMBB);
23803 F->insert(I, TailMBB);
23804
23805 // Set the call frame size on entry to the new basic blocks.
23806 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
23807 IfFalseMBB->setCallFrameSize(CallFrameSize);
23808 TailMBB->setCallFrameSize(CallFrameSize);
23809
23810 // Transfer debug instructions associated with the selects to TailMBB.
23811 for (MachineInstr *DebugInstr : SelectDebugValues) {
23812 TailMBB->push_back(DebugInstr->removeFromParent());
23813 }
23814
23815 // Move all instructions after the sequence to TailMBB.
23816 TailMBB->splice(TailMBB->end(), HeadMBB,
23817 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
23818 // Update machine-CFG edges by transferring all successors of the current
23819 // block to the new block which will contain the Phi nodes for the selects.
23820 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
23821 // Set the successors for HeadMBB.
23822 HeadMBB->addSuccessor(IfFalseMBB);
23823 HeadMBB->addSuccessor(TailMBB);
23824
23825 // Insert appropriate branch.
23826 if (MI.getOperand(2).isImm())
23827 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
23828 .addReg(LHS)
23829 .addImm(MI.getOperand(2).getImm())
23830 .addMBB(TailMBB);
23831 else
23832 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
23833 .addReg(LHS)
23834 .addReg(RHS)
23835 .addMBB(TailMBB);
23836
23837 // IfFalseMBB just falls through to TailMBB.
23838 IfFalseMBB->addSuccessor(TailMBB);
23839
23840 // Create PHIs for all of the select pseudo-instructions.
23841 auto SelectMBBI = MI.getIterator();
23842 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
23843 auto InsertionPoint = TailMBB->begin();
23844 while (SelectMBBI != SelectEnd) {
23845 auto Next = std::next(SelectMBBI);
23846 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
23847 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
23848 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
23849 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
23850 .addReg(SelectMBBI->getOperand(4).getReg())
23851 .addMBB(HeadMBB)
23852 .addReg(SelectMBBI->getOperand(5).getReg())
23853 .addMBB(IfFalseMBB);
23854 SelectMBBI->eraseFromParent();
23855 }
23856 SelectMBBI = Next;
23857 }
23858
23859 F->getProperties().resetNoPHIs();
23860 return TailMBB;
23861}
23862
23863// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
23864static const RISCV::RISCVMaskedPseudoInfo *
23865lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
23867 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
23868 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
23870 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
23871 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
23872 return Masked;
23873}
23874
23877 unsigned CVTXOpc) {
23878 DebugLoc DL = MI.getDebugLoc();
23879
23881
23883 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
23884
23885 // Save the old value of FFLAGS.
23886 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
23887
23888 assert(MI.getNumOperands() == 7);
23889
23890 // Emit a VFCVT_X_F
23891 const TargetRegisterInfo *TRI =
23893 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
23894 Register Tmp = MRI.createVirtualRegister(RC);
23895 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
23896 .add(MI.getOperand(1))
23897 .add(MI.getOperand(2))
23898 .add(MI.getOperand(3))
23899 .add(MachineOperand::CreateImm(7)) // frm = DYN
23900 .add(MI.getOperand(4))
23901 .add(MI.getOperand(5))
23902 .add(MI.getOperand(6))
23903 .add(MachineOperand::CreateReg(RISCV::FRM,
23904 /*IsDef*/ false,
23905 /*IsImp*/ true));
23906
23907 // Emit a VFCVT_F_X
23908 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
23909 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
23910 // There is no E8 variant for VFCVT_F_X.
23911 assert(Log2SEW >= 4);
23912 unsigned CVTFOpc =
23913 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
23914 ->MaskedPseudo;
23915
23916 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
23917 .add(MI.getOperand(0))
23918 .add(MI.getOperand(1))
23919 .addReg(Tmp)
23920 .add(MI.getOperand(3))
23921 .add(MachineOperand::CreateImm(7)) // frm = DYN
23922 .add(MI.getOperand(4))
23923 .add(MI.getOperand(5))
23924 .add(MI.getOperand(6))
23925 .add(MachineOperand::CreateReg(RISCV::FRM,
23926 /*IsDef*/ false,
23927 /*IsImp*/ true));
23928
23929 // Restore FFLAGS.
23930 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
23931 .addReg(SavedFFLAGS, RegState::Kill);
23932
23933 // Erase the pseudoinstruction.
23934 MI.eraseFromParent();
23935 return BB;
23936}
23937
23939 const RISCVSubtarget &Subtarget) {
23940 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
23941 const TargetRegisterClass *RC;
23942 switch (MI.getOpcode()) {
23943 default:
23944 llvm_unreachable("Unexpected opcode");
23945 case RISCV::PseudoFROUND_H:
23946 CmpOpc = RISCV::FLT_H;
23947 F2IOpc = RISCV::FCVT_W_H;
23948 I2FOpc = RISCV::FCVT_H_W;
23949 FSGNJOpc = RISCV::FSGNJ_H;
23950 FSGNJXOpc = RISCV::FSGNJX_H;
23951 RC = &RISCV::FPR16RegClass;
23952 break;
23953 case RISCV::PseudoFROUND_H_INX:
23954 CmpOpc = RISCV::FLT_H_INX;
23955 F2IOpc = RISCV::FCVT_W_H_INX;
23956 I2FOpc = RISCV::FCVT_H_W_INX;
23957 FSGNJOpc = RISCV::FSGNJ_H_INX;
23958 FSGNJXOpc = RISCV::FSGNJX_H_INX;
23959 RC = &RISCV::GPRF16RegClass;
23960 break;
23961 case RISCV::PseudoFROUND_S:
23962 CmpOpc = RISCV::FLT_S;
23963 F2IOpc = RISCV::FCVT_W_S;
23964 I2FOpc = RISCV::FCVT_S_W;
23965 FSGNJOpc = RISCV::FSGNJ_S;
23966 FSGNJXOpc = RISCV::FSGNJX_S;
23967 RC = &RISCV::FPR32RegClass;
23968 break;
23969 case RISCV::PseudoFROUND_S_INX:
23970 CmpOpc = RISCV::FLT_S_INX;
23971 F2IOpc = RISCV::FCVT_W_S_INX;
23972 I2FOpc = RISCV::FCVT_S_W_INX;
23973 FSGNJOpc = RISCV::FSGNJ_S_INX;
23974 FSGNJXOpc = RISCV::FSGNJX_S_INX;
23975 RC = &RISCV::GPRF32RegClass;
23976 break;
23977 case RISCV::PseudoFROUND_D:
23978 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
23979 CmpOpc = RISCV::FLT_D;
23980 F2IOpc = RISCV::FCVT_L_D;
23981 I2FOpc = RISCV::FCVT_D_L;
23982 FSGNJOpc = RISCV::FSGNJ_D;
23983 FSGNJXOpc = RISCV::FSGNJX_D;
23984 RC = &RISCV::FPR64RegClass;
23985 break;
23986 case RISCV::PseudoFROUND_D_INX:
23987 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
23988 CmpOpc = RISCV::FLT_D_INX;
23989 F2IOpc = RISCV::FCVT_L_D_INX;
23990 I2FOpc = RISCV::FCVT_D_L_INX;
23991 FSGNJOpc = RISCV::FSGNJ_D_INX;
23992 FSGNJXOpc = RISCV::FSGNJX_D_INX;
23993 RC = &RISCV::GPRRegClass;
23994 break;
23995 }
23996
23997 const BasicBlock *BB = MBB->getBasicBlock();
23998 DebugLoc DL = MI.getDebugLoc();
23999 MachineFunction::iterator I = ++MBB->getIterator();
24000
24001 MachineFunction *F = MBB->getParent();
24002 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
24003 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
24004
24005 F->insert(I, CvtMBB);
24006 F->insert(I, DoneMBB);
24007 // Move all instructions after the sequence to DoneMBB.
24008 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
24009 MBB->end());
24010 // Update machine-CFG edges by transferring all successors of the current
24011 // block to the new block which will contain the Phi nodes for the selects.
24013 // Set the successors for MBB.
24014 MBB->addSuccessor(CvtMBB);
24015 MBB->addSuccessor(DoneMBB);
24016
24017 Register DstReg = MI.getOperand(0).getReg();
24018 Register SrcReg = MI.getOperand(1).getReg();
24019 Register MaxReg = MI.getOperand(2).getReg();
24020 int64_t FRM = MI.getOperand(3).getImm();
24021
24022 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
24023 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
24024
24025 Register FabsReg = MRI.createVirtualRegister(RC);
24026 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
24027
24028 // Compare the FP value to the max value.
24029 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
24030 auto MIB =
24031 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
24034
24035 // Insert branch.
24036 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
24037 .addReg(CmpReg)
24038 .addReg(RISCV::X0)
24039 .addMBB(DoneMBB);
24040
24041 CvtMBB->addSuccessor(DoneMBB);
24042
24043 // Convert to integer.
24044 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
24045 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
24048
24049 // Convert back to FP.
24050 Register I2FReg = MRI.createVirtualRegister(RC);
24051 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
24054
24055 // Restore the sign bit.
24056 Register CvtReg = MRI.createVirtualRegister(RC);
24057 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
24058
24059 // Merge the results.
24060 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
24061 .addReg(SrcReg)
24062 .addMBB(MBB)
24063 .addReg(CvtReg)
24064 .addMBB(CvtMBB);
24065
24066 MI.eraseFromParent();
24067 return DoneMBB;
24068}
24069
24072 MachineBasicBlock *BB) const {
24073 switch (MI.getOpcode()) {
24074 default:
24075 llvm_unreachable("Unexpected instr type to insert");
24076 case RISCV::ReadCounterWide:
24077 assert(!Subtarget.is64Bit() &&
24078 "ReadCounterWide is only to be used on riscv32");
24079 return emitReadCounterWidePseudo(MI, BB);
24080 case RISCV::Select_GPR_Using_CC_GPR:
24081 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
24082 case RISCV::Select_GPR_Using_CC_SImm5_CV:
24083 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
24084 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
24085 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
24086 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
24087 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
24088 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
24089 case RISCV::Select_FPR16_Using_CC_GPR:
24090 case RISCV::Select_FPR16INX_Using_CC_GPR:
24091 case RISCV::Select_FPR32_Using_CC_GPR:
24092 case RISCV::Select_FPR32INX_Using_CC_GPR:
24093 case RISCV::Select_FPR64_Using_CC_GPR:
24094 case RISCV::Select_FPR64INX_Using_CC_GPR:
24095 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
24096 return emitSelectPseudo(MI, BB, Subtarget);
24097 case RISCV::BuildPairF64Pseudo:
24098 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
24099 case RISCV::SplitF64Pseudo:
24100 return emitSplitF64Pseudo(MI, BB, Subtarget);
24101 case RISCV::PseudoQuietFLE_H:
24102 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
24103 case RISCV::PseudoQuietFLE_H_INX:
24104 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
24105 case RISCV::PseudoQuietFLT_H:
24106 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
24107 case RISCV::PseudoQuietFLT_H_INX:
24108 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
24109 case RISCV::PseudoQuietFLE_S:
24110 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
24111 case RISCV::PseudoQuietFLE_S_INX:
24112 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
24113 case RISCV::PseudoQuietFLT_S:
24114 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
24115 case RISCV::PseudoQuietFLT_S_INX:
24116 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
24117 case RISCV::PseudoQuietFLE_D:
24118 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
24119 case RISCV::PseudoQuietFLE_D_INX:
24120 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
24121 case RISCV::PseudoQuietFLE_D_IN32X:
24122 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
24123 Subtarget);
24124 case RISCV::PseudoQuietFLT_D:
24125 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
24126 case RISCV::PseudoQuietFLT_D_INX:
24127 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
24128 case RISCV::PseudoQuietFLT_D_IN32X:
24129 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
24130 Subtarget);
24131
24132 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
24133 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
24134 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
24135 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
24136 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
24137 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
24138 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
24139 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
24140 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
24141 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
24142 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
24143 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
24144 case RISCV::PseudoFROUND_H:
24145 case RISCV::PseudoFROUND_H_INX:
24146 case RISCV::PseudoFROUND_S:
24147 case RISCV::PseudoFROUND_S_INX:
24148 case RISCV::PseudoFROUND_D:
24149 case RISCV::PseudoFROUND_D_INX:
24150 case RISCV::PseudoFROUND_D_IN32X:
24151 return emitFROUND(MI, BB, Subtarget);
24152 case RISCV::PROBED_STACKALLOC_DYN:
24153 return emitDynamicProbedAlloc(MI, BB);
24154 case TargetOpcode::STATEPOINT:
24155 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
24156 // while jal call instruction (where statepoint will be lowered at the end)
24157 // has implicit def. This def is early-clobber as it will be set at
24158 // the moment of the call and earlier than any use is read.
24159 // Add this implicit dead def here as a workaround.
24160 MI.addOperand(*MI.getMF(),
24162 RISCV::X1, /*isDef*/ true,
24163 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
24164 /*isUndef*/ false, /*isEarlyClobber*/ true));
24165 [[fallthrough]];
24166 case TargetOpcode::STACKMAP:
24167 case TargetOpcode::PATCHPOINT:
24168 if (!Subtarget.is64Bit())
24169 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
24170 "supported on 64-bit targets");
24171 return emitPatchPoint(MI, BB);
24172 }
24173}
24174
24176 SDNode *Node) const {
24177 // If instruction defines FRM operand, conservatively set it as non-dead to
24178 // express data dependency with FRM users and prevent incorrect instruction
24179 // reordering.
24180 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
24181 FRMDef->setIsDead(false);
24182 return;
24183 }
24184 // Add FRM dependency to any instructions with dynamic rounding mode.
24185 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
24186 if (Idx < 0) {
24187 // Vector pseudos have FRM index indicated by TSFlags.
24188 Idx = RISCVII::getFRMOpNum(MI.getDesc());
24189 if (Idx < 0)
24190 return;
24191 }
24192 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
24193 return;
24194 // If the instruction already reads FRM, don't add another read.
24195 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
24196 return;
24197 MI.addOperand(
24198 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
24199}
24200
24201void RISCVTargetLowering::analyzeInputArgs(
24202 MachineFunction &MF, CCState &CCInfo,
24203 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
24204 RISCVCCAssignFn Fn) const {
24205 for (const auto &[Idx, In] : enumerate(Ins)) {
24206 MVT ArgVT = In.VT;
24207 ISD::ArgFlagsTy ArgFlags = In.Flags;
24208
24209 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
24210 In.OrigTy)) {
24211 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
24212 << ArgVT << '\n');
24213 llvm_unreachable(nullptr);
24214 }
24215 }
24216}
24217
24218void RISCVTargetLowering::analyzeOutputArgs(
24219 MachineFunction &MF, CCState &CCInfo,
24220 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
24221 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
24222 for (const auto &[Idx, Out] : enumerate(Outs)) {
24223 MVT ArgVT = Out.VT;
24224 ISD::ArgFlagsTy ArgFlags = Out.Flags;
24225
24226 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
24227 Out.OrigTy)) {
24228 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
24229 << ArgVT << "\n");
24230 llvm_unreachable(nullptr);
24231 }
24232 }
24233}
24234
24235// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
24236// values.
24238 const CCValAssign &VA, const SDLoc &DL,
24239 const RISCVSubtarget &Subtarget) {
24240 if (VA.needsCustom()) {
24241 if (VA.getLocVT().isInteger() &&
24242 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
24243 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
24244 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
24245 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
24247 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
24248 llvm_unreachable("Unexpected Custom handling.");
24249 }
24250
24251 switch (VA.getLocInfo()) {
24252 default:
24253 llvm_unreachable("Unexpected CCValAssign::LocInfo");
24254 case CCValAssign::Full:
24255 break;
24256 case CCValAssign::BCvt:
24257 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
24258 break;
24259 }
24260 return Val;
24261}
24262
24263// The caller is responsible for loading the full value if the argument is
24264// passed with CCValAssign::Indirect.
24266 const CCValAssign &VA, const SDLoc &DL,
24267 const ISD::InputArg &In,
24268 const RISCVTargetLowering &TLI) {
24271 EVT LocVT = VA.getLocVT();
24272 SDValue Val;
24273 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
24274 Register VReg = RegInfo.createVirtualRegister(RC);
24275 RegInfo.addLiveIn(VA.getLocReg(), VReg);
24276 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
24277
24278 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
24279 if (In.isOrigArg()) {
24280 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
24281 if (OrigArg->getType()->isIntegerTy()) {
24282 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
24283 // An input zero extended from i31 can also be considered sign extended.
24284 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
24285 (BitWidth < 32 && In.Flags.isZExt())) {
24287 RVFI->addSExt32Register(VReg);
24288 }
24289 }
24290 }
24291
24293 return Val;
24294
24295 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
24296}
24297
24299 const CCValAssign &VA, const SDLoc &DL,
24300 const RISCVSubtarget &Subtarget) {
24301 EVT LocVT = VA.getLocVT();
24302
24303 if (VA.needsCustom()) {
24304 if (LocVT.isInteger() &&
24305 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
24306 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
24307 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
24308 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
24309 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
24310 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
24311 llvm_unreachable("Unexpected Custom handling.");
24312 }
24313
24314 switch (VA.getLocInfo()) {
24315 default:
24316 llvm_unreachable("Unexpected CCValAssign::LocInfo");
24317 case CCValAssign::Full:
24318 break;
24319 case CCValAssign::BCvt:
24320 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
24321 break;
24322 }
24323 return Val;
24324}
24325
24326// The caller is responsible for loading the full value if the argument is
24327// passed with CCValAssign::Indirect.
24329 const CCValAssign &VA, const SDLoc &DL,
24330 const RISCVTargetLowering &TLI) {
24332 MachineFrameInfo &MFI = MF.getFrameInfo();
24333 EVT LocVT = VA.getLocVT();
24335 int FI = MFI.CreateFixedObject(LocVT.getStoreSize(), VA.getLocMemOffset(),
24336 /*IsImmutable=*/true);
24337 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
24338 SDValue Val = DAG.getLoad(
24339 LocVT, DL, Chain, FIN,
24341
24343 return Val;
24344
24345 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
24346}
24347
24349 const CCValAssign &VA,
24350 const CCValAssign &HiVA,
24351 const SDLoc &DL) {
24352 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
24353 "Unexpected VA");
24355 MachineFrameInfo &MFI = MF.getFrameInfo();
24357
24358 assert(VA.isRegLoc() && "Expected register VA assignment");
24359
24360 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
24361 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
24362 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
24363 SDValue Hi;
24364 if (HiVA.isMemLoc()) {
24365 // Second half of f64 is passed on the stack.
24366 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
24367 /*IsImmutable=*/true);
24368 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
24369 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
24371 } else {
24372 // Second half of f64 is passed in another GPR.
24373 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
24374 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
24375 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
24376 }
24377
24378 // For big-endian, swap the order of Lo and Hi when building the pair.
24379 const RISCVSubtarget &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
24380 if (!Subtarget.isLittleEndian())
24381 std::swap(Lo, Hi);
24382
24383 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
24384}
24385
24386// Transform physical registers into virtual registers.
24388 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
24389 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
24390 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
24391
24394
24395 switch (CallConv) {
24396 default:
24397 reportFatalUsageError("Unsupported calling convention");
24398 case CallingConv::C:
24399 case CallingConv::Fast:
24402 case CallingConv::GRAAL:
24404#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
24405 CC_VLS_CASE(32)
24406 CC_VLS_CASE(64)
24407 CC_VLS_CASE(128)
24408 CC_VLS_CASE(256)
24409 CC_VLS_CASE(512)
24410 CC_VLS_CASE(1024)
24411 CC_VLS_CASE(2048)
24412 CC_VLS_CASE(4096)
24413 CC_VLS_CASE(8192)
24414 CC_VLS_CASE(16384)
24415 CC_VLS_CASE(32768)
24416 CC_VLS_CASE(65536)
24417#undef CC_VLS_CASE
24418 break;
24419 case CallingConv::GHC:
24420 if (Subtarget.hasStdExtE())
24421 reportFatalUsageError("GHC calling convention is not supported on RVE!");
24422 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
24423 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
24424 "(Zdinx/D) instruction set extensions");
24425 }
24426
24427 const Function &Func = MF.getFunction();
24428 if (Func.hasFnAttribute("interrupt")) {
24429 if (!Func.arg_empty())
24431 "Functions with the interrupt attribute cannot have arguments!");
24432
24433 StringRef Kind =
24434 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
24435
24436 constexpr StringLiteral SupportedInterruptKinds[] = {
24437 "machine",
24438 "supervisor",
24439 "rnmi",
24440 "qci-nest",
24441 "qci-nonest",
24442 "SiFive-CLIC-preemptible",
24443 "SiFive-CLIC-stack-swap",
24444 "SiFive-CLIC-preemptible-stack-swap",
24445 };
24446 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
24448 "Function interrupt attribute argument not supported!");
24449
24450 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
24452 "'qci-*' interrupt kinds require Xqciint extension");
24453
24454 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
24456 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
24457
24458 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
24459 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
24460 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
24461 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
24462 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
24463 "have a frame pointer");
24464 }
24465
24466 EVT PtrVT = getPointerTy(DAG.getDataLayout());
24467 MVT XLenVT = Subtarget.getXLenVT();
24468 unsigned XLenInBytes = Subtarget.getXLen() / 8;
24469 // Used with vargs to accumulate store chains.
24470 std::vector<SDValue> OutChains;
24471
24472 // Assign locations to all of the incoming arguments.
24474 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
24475
24476 if (CallConv == CallingConv::GHC)
24478 else
24479 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
24481 : CC_RISCV);
24482
24483 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
24484 CCValAssign &VA = ArgLocs[i];
24485 SDValue ArgValue;
24486 // Passing f64 on RV32D with a soft float ABI must be handled as a special
24487 // case.
24488 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
24489 assert(VA.needsCustom());
24490 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
24491 } else if (VA.isRegLoc())
24492 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
24493 else
24494 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL, *this);
24495
24496 if (VA.getLocInfo() == CCValAssign::Indirect) {
24497 // If the original argument was split and passed by reference (e.g. i128
24498 // on RV32), we need to load all parts of it here (using the same
24499 // address). Vectors may be partly split to registers and partly to the
24500 // stack, in which case the base address is partly offset and subsequent
24501 // stores are relative to that.
24502 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
24504 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
24505 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
24506 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
24507 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
24508 CCValAssign &PartVA = ArgLocs[i + 1];
24509 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
24510 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
24511 if (PartVA.getValVT().isScalableVector())
24512 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
24513 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
24514 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
24516 ++i;
24517 ++InsIdx;
24518 }
24519 continue;
24520 }
24521 InVals.push_back(ArgValue);
24522 if (Ins[InsIdx].Flags.isByVal())
24523 RVFI->addIncomingByValArgs(ArgValue);
24524 }
24525
24526 if (any_of(ArgLocs,
24527 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
24529
24530 if (IsVarArg) {
24531 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
24532 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
24533 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
24534 MachineFrameInfo &MFI = MF.getFrameInfo();
24535 MachineRegisterInfo &RegInfo = MF.getRegInfo();
24536
24537 // Size of the vararg save area. For now, the varargs save area is either
24538 // zero or large enough to hold a0-a7.
24539 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
24540 int FI;
24541
24542 // If all registers are allocated, then all varargs must be passed on the
24543 // stack and we don't need to save any argregs.
24544 if (VarArgsSaveSize == 0) {
24545 int VaArgOffset = CCInfo.getStackSize();
24546 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
24547 } else {
24548 int VaArgOffset = -VarArgsSaveSize;
24549 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
24550
24551 // If saving an odd number of registers then create an extra stack slot to
24552 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
24553 // offsets to even-numbered registered remain 2*XLEN-aligned.
24554 if (Idx % 2) {
24556 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
24557 VarArgsSaveSize += XLenInBytes;
24558 }
24559
24560 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
24561
24562 // Copy the integer registers that may have been used for passing varargs
24563 // to the vararg save area.
24564 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
24565 const Register Reg = RegInfo.createVirtualRegister(RC);
24566 RegInfo.addLiveIn(ArgRegs[I], Reg);
24567 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
24568 SDValue Store = DAG.getStore(
24569 Chain, DL, ArgValue, FIN,
24570 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
24571 OutChains.push_back(Store);
24572 FIN =
24573 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
24574 }
24575 }
24576
24577 // Record the frame index of the first variable argument
24578 // which is a value necessary to VASTART.
24579 RVFI->setVarArgsFrameIndex(FI);
24580 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
24581 }
24582
24583 RVFI->setArgumentStackSize(CCInfo.getStackSize());
24584
24585 // All stores are grouped in one node to allow the matching between
24586 // the size of Ins and InVals. This only happens for vararg functions.
24587 if (!OutChains.empty()) {
24588 OutChains.push_back(Chain);
24589 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
24590 }
24591
24592 return Chain;
24593}
24594
24595/// isEligibleForTailCallOptimization - Check whether the call is eligible
24596/// for tail call optimization.
24597/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
24598bool RISCVTargetLowering::isEligibleForTailCallOptimization(
24599 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
24600 const SmallVector<CCValAssign, 16> &ArgLocs) const {
24601
24602 auto CalleeCC = CLI.CallConv;
24603 auto &Outs = CLI.Outs;
24604 auto &Caller = MF.getFunction();
24605 auto CallerCC = Caller.getCallingConv();
24606 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
24607
24608 // Exception-handling functions need a special set of instructions to
24609 // indicate a return to the hardware. Tail-calling another function would
24610 // probably break this.
24611 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
24612 // should be expanded as new function attributes are introduced.
24613 if (Caller.hasFnAttribute("interrupt"))
24614 return false;
24615
24616 // If the stack arguments for this call do not fit into our own save area then
24617 // the call cannot be made tail.
24618 if (CCInfo.getStackSize() > RVFI->getArgumentStackSize())
24619 return false;
24620
24621 // Do not tail call opt if either caller or callee uses struct return
24622 // semantics.
24623 auto IsCallerStructRet = Caller.hasStructRetAttr();
24624 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
24625 if (IsCallerStructRet != IsCalleeStructRet)
24626 return false;
24627
24628 // Do not tail call opt if caller's and callee's byval arguments do not match.
24629 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
24630 if (!Outs[i].Flags.isByVal())
24631 continue;
24632 if (j++ >= RVFI->getIncomingByValArgsSize())
24633 return false;
24634 if (RVFI->getIncomingByValArgs(i).getValueType() != Outs[i].ArgVT)
24635 return false;
24636 }
24637
24638 // The callee has to preserve all registers the caller needs to preserve.
24639 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
24640 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
24641 if (CalleeCC != CallerCC) {
24642 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
24643 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
24644 return false;
24645 }
24646
24647 // If the callee takes no arguments then go on to check the results of the
24648 // call.
24649 const MachineRegisterInfo &MRI = MF.getRegInfo();
24650 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
24651 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
24652 return false;
24653
24654 return true;
24655}
24656
24658 return DAG.getDataLayout().getPrefTypeAlign(
24659 VT.getTypeForEVT(*DAG.getContext()));
24660}
24661
24662// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
24663// and output parameter nodes.
24665 SmallVectorImpl<SDValue> &InVals) const {
24666 SelectionDAG &DAG = CLI.DAG;
24667 SDLoc &DL = CLI.DL;
24669 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
24671 SDValue Chain = CLI.Chain;
24672 SDValue Callee = CLI.Callee;
24673 bool &IsTailCall = CLI.IsTailCall;
24674 CallingConv::ID CallConv = CLI.CallConv;
24675 bool IsVarArg = CLI.IsVarArg;
24676 EVT PtrVT = getPointerTy(DAG.getDataLayout());
24677 MVT XLenVT = Subtarget.getXLenVT();
24678 const CallBase *CB = CLI.CB;
24679
24683
24684 // Set type id for call site info.
24685 setTypeIdForCallsiteInfo(CB, MF, CSInfo);
24686
24687 // Analyze the operands of the call, assigning locations to each operand.
24689 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
24690
24691 if (CallConv == CallingConv::GHC) {
24692 if (Subtarget.hasStdExtE())
24693 reportFatalUsageError("GHC calling convention is not supported on RVE!");
24694 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
24695 } else
24696 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
24698 : CC_RISCV);
24699
24700 // Check if it's really possible to do a tail call.
24701 if (IsTailCall)
24702 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
24703
24704 if (IsTailCall)
24705 ++NumTailCalls;
24706 else if (CLI.CB && CLI.CB->isMustTailCall())
24707 reportFatalInternalError("failed to perform tail call elimination on a "
24708 "call site marked musttail");
24709
24710 // Get a count of how many bytes are to be pushed on the stack.
24711 unsigned NumBytes = ArgCCInfo.getStackSize();
24712
24713 // Create local copies for byval args
24714 SmallVector<SDValue, 8> ByValArgs;
24715 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
24716 ISD::ArgFlagsTy Flags = Outs[i].Flags;
24717 if (!Flags.isByVal())
24718 continue;
24719
24720 SDValue Arg = OutVals[i];
24721 unsigned Size = Flags.getByValSize();
24722 Align Alignment = Flags.getNonZeroByValAlign();
24723
24724 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
24725 SDValue Dst;
24726
24727 if (IsTailCall) {
24728 SDValue CallerArg = RVFI->getIncomingByValArgs(j++);
24731 Dst = CallerArg;
24732 } else {
24733 int FI =
24734 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
24735 Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
24736 }
24737 if (Dst) {
24738 Chain =
24739 DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment,
24740 /*IsVolatile=*/false,
24741 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
24743 ByValArgs.push_back(Dst);
24744 }
24745 }
24746
24747 if (!IsTailCall)
24748 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
24749
24750 // During a tail call, stores to the argument area must happen after all of
24751 // the function's incoming arguments have been loaded because they may alias.
24752 // This is done by folding in a TokenFactor from LowerFormalArguments, but
24753 // there's no point in doing so repeatedly so this tracks whether that's
24754 // happened yet.
24755 bool AfterFormalArgLoads = false;
24756
24757 // Copy argument values to their designated locations.
24759 SmallVector<SDValue, 8> MemOpChains;
24760 SDValue StackPtr;
24761 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
24762 ++i, ++OutIdx) {
24763 CCValAssign &VA = ArgLocs[i];
24764 SDValue ArgValue = OutVals[OutIdx];
24765 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
24766
24767 // Handle passing f64 on RV32D with a soft float ABI as a special case.
24768 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
24769 assert(VA.isRegLoc() && "Expected register VA assignment");
24770 assert(VA.needsCustom());
24771 SDValue SplitF64 = DAG.getNode(
24772 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
24773 SDValue Lo = SplitF64.getValue(0);
24774 SDValue Hi = SplitF64.getValue(1);
24775
24776 // For big-endian, swap the order of Lo and Hi when passing.
24777 if (!Subtarget.isLittleEndian())
24778 std::swap(Lo, Hi);
24779
24780 Register RegLo = VA.getLocReg();
24781 RegsToPass.push_back(std::make_pair(RegLo, Lo));
24782
24783 // Get the CCValAssign for the Hi part.
24784 CCValAssign &HiVA = ArgLocs[++i];
24785
24786 if (HiVA.isMemLoc()) {
24787 // Second half of f64 is passed on the stack.
24788 if (!StackPtr.getNode())
24789 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
24791 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
24792 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
24793 // Emit the store.
24794 MemOpChains.push_back(DAG.getStore(
24795 Chain, DL, Hi, Address,
24797 } else {
24798 // Second half of f64 is passed in another GPR.
24799 Register RegHigh = HiVA.getLocReg();
24800 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
24801 }
24802 continue;
24803 }
24804
24805 // Promote the value if needed.
24806 // For now, only handle fully promoted and indirect arguments.
24807 if (VA.getLocInfo() == CCValAssign::Indirect) {
24808 // Store the argument in a stack slot and pass its address.
24809 Align StackAlign =
24810 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
24811 getPrefTypeAlign(ArgValue.getValueType(), DAG));
24812 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
24813 // If the original argument was split (e.g. i128), we need
24814 // to store the required parts of it here (and pass just one address).
24815 // Vectors may be partly split to registers and partly to the stack, in
24816 // which case the base address is partly offset and subsequent stores are
24817 // relative to that.
24818 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
24819 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
24820 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
24821 // Calculate the total size to store. We don't have access to what we're
24822 // actually storing other than performing the loop and collecting the
24823 // info.
24825 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
24826 SDValue PartValue = OutVals[OutIdx + 1];
24827 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
24828 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
24829 EVT PartVT = PartValue.getValueType();
24830 if (PartVT.isScalableVector())
24831 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
24832 StoredSize += PartVT.getStoreSize();
24833 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
24834 Parts.push_back(std::make_pair(PartValue, Offset));
24835 ++i;
24836 ++OutIdx;
24837 }
24838 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
24839 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
24840 MemOpChains.push_back(
24841 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
24843 for (const auto &Part : Parts) {
24844 SDValue PartValue = Part.first;
24845 SDValue PartOffset = Part.second;
24847 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
24848 MemOpChains.push_back(
24849 DAG.getStore(Chain, DL, PartValue, Address,
24851 }
24852 ArgValue = SpillSlot;
24853 } else {
24854 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
24855 }
24856
24857 // Use local copy if it is a byval arg.
24858 if (Flags.isByVal()) {
24859 if (!IsTailCall || (isa<GlobalAddressSDNode>(ArgValue) ||
24860 isa<ExternalSymbolSDNode>(ArgValue) ||
24861 isa<FrameIndexSDNode>(ArgValue)))
24862 ArgValue = ByValArgs[j++];
24863 }
24864
24865 if (VA.isRegLoc()) {
24866 // Queue up the argument copies and emit them at the end.
24867 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
24868
24869 const TargetOptions &Options = DAG.getTarget().Options;
24870 if (Options.EmitCallSiteInfo)
24871 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
24872 } else {
24873 assert(VA.isMemLoc() && "Argument not register or memory");
24874 SDValue DstAddr;
24875 MachinePointerInfo DstInfo;
24876 int32_t Offset = VA.getLocMemOffset();
24877
24878 // Work out the address of the stack slot.
24879 if (!StackPtr.getNode())
24880 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
24881
24882 if (IsTailCall) {
24883 unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8);
24884 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
24885 DstAddr = DAG.getFrameIndex(FI, PtrVT);
24886 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
24887 if (!AfterFormalArgLoads) {
24888 Chain = DAG.getStackArgumentTokenFactor(Chain);
24889 AfterFormalArgLoads = true;
24890 }
24891 } else {
24892 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
24893 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
24894 DstInfo = MachinePointerInfo::getStack(MF, Offset);
24895 }
24896
24897 // Emit the store.
24898 MemOpChains.push_back(
24899 DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
24900 }
24901 }
24902
24903 // Join the stores, which are independent of one another.
24904 if (!MemOpChains.empty())
24905 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
24906
24907 SDValue Glue;
24908
24909 // Build a sequence of copy-to-reg nodes, chained and glued together.
24910 for (auto &Reg : RegsToPass) {
24911 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
24912 Glue = Chain.getValue(1);
24913 }
24914
24915 // Validate that none of the argument registers have been marked as
24916 // reserved, if so report an error. Do the same for the return address if this
24917 // is not a tailcall.
24918 validateCCReservedRegs(RegsToPass, MF);
24919 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
24921 MF.getFunction(),
24922 "Return address register required, but has been reserved."});
24923
24924 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
24925 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
24926 // split it and then direct call can be matched by PseudoCALL.
24927 bool CalleeIsLargeExternalSymbol = false;
24929 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
24930 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
24931 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
24932 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
24933 CalleeIsLargeExternalSymbol = true;
24934 }
24935 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
24936 const GlobalValue *GV = S->getGlobal();
24937 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
24938 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
24939 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
24940 }
24941
24942 // The first call operand is the chain and the second is the target address.
24944 Ops.push_back(Chain);
24945 Ops.push_back(Callee);
24946
24947 // Add argument registers to the end of the list so that they are
24948 // known live into the call.
24949 for (auto &Reg : RegsToPass)
24950 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
24951
24952 // Add a register mask operand representing the call-preserved registers.
24953 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
24954 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
24955 assert(Mask && "Missing call preserved mask for calling convention");
24956 Ops.push_back(DAG.getRegisterMask(Mask));
24957
24958 // Glue the call to the argument copies, if any.
24959 if (Glue.getNode())
24960 Ops.push_back(Glue);
24961
24962 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
24963 "Unexpected CFI type for a direct call");
24964
24965 // Emit the call.
24966 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
24967
24968 // Use software guarded branch for large code model non-indirect calls
24969 // Tail call to external symbol will have a null CLI.CB and we need another
24970 // way to determine the callsite type
24971 bool NeedSWGuarded = false;
24973 Subtarget.hasStdExtZicfilp() &&
24974 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
24975 NeedSWGuarded = true;
24976
24977 if (IsTailCall) {
24979 unsigned CallOpc =
24980 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
24981 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
24982 if (CLI.CFIType)
24983 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
24984 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
24985 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
24986 return Ret;
24987 }
24988
24989 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
24990 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
24991 if (CLI.CFIType)
24992 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
24993
24994 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
24995 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
24996 Glue = Chain.getValue(1);
24997
24998 // Mark the end of the call, which is glued to the call itself.
24999 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
25000 Glue = Chain.getValue(1);
25001
25002 // Assign locations to each value returned by this call.
25004 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
25005 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
25006
25007 // Copy all of the result registers out of their specified physreg.
25008 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
25009 auto &VA = RVLocs[i];
25010 // Copy the value out
25011 SDValue RetValue =
25012 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
25013 // Glue the RetValue to the end of the call sequence
25014 Chain = RetValue.getValue(1);
25015 Glue = RetValue.getValue(2);
25016
25017 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
25018 assert(VA.needsCustom());
25019 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
25020 MVT::i32, Glue);
25021 Chain = RetValue2.getValue(1);
25022 Glue = RetValue2.getValue(2);
25023
25024 // For big-endian, swap the order when building the pair.
25025 SDValue Lo = RetValue;
25026 SDValue Hi = RetValue2;
25027 if (!Subtarget.isLittleEndian())
25028 std::swap(Lo, Hi);
25029
25030 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
25031 } else
25032 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
25033
25034 InVals.push_back(RetValue);
25035 }
25036
25037 return Chain;
25038}
25039
25041 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
25042 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
25043 const Type *RetTy) const {
25045 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
25046
25047 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
25048 MVT VT = Outs[i].VT;
25049 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
25050 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
25051 /*IsRet=*/true, Outs[i].OrigTy))
25052 return false;
25053 }
25054 return true;
25055}
25056
25057SDValue
25059 bool IsVarArg,
25061 const SmallVectorImpl<SDValue> &OutVals,
25062 const SDLoc &DL, SelectionDAG &DAG) const {
25064
25065 // Stores the assignment of the return value to a location.
25067
25068 // Info about the registers and stack slot.
25069 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
25070 *DAG.getContext());
25071
25072 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
25073 nullptr, CC_RISCV);
25074
25075 if (CallConv == CallingConv::GHC && !RVLocs.empty())
25076 reportFatalUsageError("GHC functions return void only");
25077
25078 SDValue Glue;
25079 SmallVector<SDValue, 4> RetOps(1, Chain);
25080
25081 // Copy the result values into the output registers.
25082 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
25083 SDValue Val = OutVals[OutIdx];
25084 CCValAssign &VA = RVLocs[i];
25085 assert(VA.isRegLoc() && "Can only return in registers!");
25086
25087 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
25088 // Handle returning f64 on RV32D with a soft float ABI.
25089 assert(VA.isRegLoc() && "Expected return via registers");
25090 assert(VA.needsCustom());
25091 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
25092 DAG.getVTList(MVT::i32, MVT::i32), Val);
25093 SDValue Lo = SplitF64.getValue(0);
25094 SDValue Hi = SplitF64.getValue(1);
25095
25096 // For big-endian, swap the order of Lo and Hi when returning.
25097 if (!Subtarget.isLittleEndian())
25098 std::swap(Lo, Hi);
25099
25100 Register RegLo = VA.getLocReg();
25101 Register RegHi = RVLocs[++i].getLocReg();
25102
25103 if (Subtarget.isRegisterReservedByUser(RegLo) ||
25104 Subtarget.isRegisterReservedByUser(RegHi))
25106 MF.getFunction(),
25107 "Return value register required, but has been reserved."});
25108
25109 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
25110 Glue = Chain.getValue(1);
25111 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
25112 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
25113 Glue = Chain.getValue(1);
25114 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
25115 } else {
25116 // Handle a 'normal' return.
25117 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
25118 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
25119
25120 if (Subtarget.isRegisterReservedByUser(VA.getLocReg()))
25122 MF.getFunction(),
25123 "Return value register required, but has been reserved."});
25124
25125 // Guarantee that all emitted copies are stuck together.
25126 Glue = Chain.getValue(1);
25127 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
25128 }
25129 }
25130
25131 RetOps[0] = Chain; // Update chain.
25132
25133 // Add the glue node if we have it.
25134 if (Glue.getNode()) {
25135 RetOps.push_back(Glue);
25136 }
25137
25138 if (any_of(RVLocs,
25139 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
25141
25142 unsigned RetOpc = RISCVISD::RET_GLUE;
25143 // Interrupt service routines use different return instructions.
25144 const Function &Func = DAG.getMachineFunction().getFunction();
25145 if (Func.hasFnAttribute("interrupt")) {
25146 if (!Func.getReturnType()->isVoidTy())
25148 "Functions with the interrupt attribute must have void return type!");
25149
25151 StringRef Kind =
25152 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
25153
25154 if (Kind == "supervisor")
25155 RetOpc = RISCVISD::SRET_GLUE;
25156 else if (Kind == "rnmi") {
25157 assert(Subtarget.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
25158 "Need Smrnmi extension for rnmi");
25159 RetOpc = RISCVISD::MNRET_GLUE;
25160 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
25161 assert(Subtarget.hasFeature(RISCV::FeatureVendorXqciint) &&
25162 "Need Xqciint for qci-(no)nest");
25163 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
25164 } else
25165 RetOpc = RISCVISD::MRET_GLUE;
25166 }
25167
25168 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
25169}
25170
25171void RISCVTargetLowering::validateCCReservedRegs(
25172 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
25173 MachineFunction &MF) const {
25174 const Function &F = MF.getFunction();
25175
25176 if (llvm::any_of(Regs, [this](auto Reg) {
25177 return Subtarget.isRegisterReservedByUser(Reg.first);
25178 }))
25179 F.getContext().diagnose(DiagnosticInfoUnsupported{
25180 F, "Argument register required, but has been reserved."});
25181}
25182
25183// Check if the result of the node is only used as a return value, as
25184// otherwise we can't perform a tail-call.
25186 if (N->getNumValues() != 1)
25187 return false;
25188 if (!N->hasNUsesOfValue(1, 0))
25189 return false;
25190
25191 SDNode *Copy = *N->user_begin();
25192
25193 if (Copy->getOpcode() == ISD::BITCAST) {
25194 return isUsedByReturnOnly(Copy, Chain);
25195 }
25196
25197 // TODO: Handle additional opcodes in order to support tail-calling libcalls
25198 // with soft float ABIs.
25199 if (Copy->getOpcode() != ISD::CopyToReg) {
25200 return false;
25201 }
25202
25203 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
25204 // isn't safe to perform a tail call.
25205 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
25206 return false;
25207
25208 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
25209 bool HasRet = false;
25210 for (SDNode *Node : Copy->users()) {
25211 if (Node->getOpcode() != RISCVISD::RET_GLUE)
25212 return false;
25213 HasRet = true;
25214 }
25215 if (!HasRet)
25216 return false;
25217
25218 Chain = Copy->getOperand(0);
25219 return true;
25220}
25221
25223 return CI->isTailCall();
25224}
25225
25226/// getConstraintType - Given a constraint letter, return the type of
25227/// constraint it is for this target.
25230 if (Constraint.size() == 1) {
25231 switch (Constraint[0]) {
25232 default:
25233 break;
25234 case 'f':
25235 case 'R':
25236 return C_RegisterClass;
25237 case 'I':
25238 case 'J':
25239 case 'K':
25240 return C_Immediate;
25241 case 'A':
25242 return C_Memory;
25243 case 's':
25244 case 'S': // A symbolic address
25245 return C_Other;
25246 }
25247 } else {
25248 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
25249 return C_RegisterClass;
25250 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
25251 return C_RegisterClass;
25252 }
25253 return TargetLowering::getConstraintType(Constraint);
25254}
25255
25256std::pair<unsigned, const TargetRegisterClass *>
25258 StringRef Constraint,
25259 MVT VT) const {
25260 // First, see if this is a constraint that directly corresponds to a RISC-V
25261 // register class.
25262 if (Constraint.size() == 1) {
25263 switch (Constraint[0]) {
25264 case 'r':
25265 // TODO: Support fixed vectors up to XLen for P extension?
25266 if (VT.isVector())
25267 break;
25268 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
25269 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
25270 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
25271 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
25272 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
25273 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
25274 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
25275 case 'f':
25276 if (VT == MVT::f16) {
25277 if (Subtarget.hasStdExtZfhmin())
25278 return std::make_pair(0U, &RISCV::FPR16RegClass);
25279 if (Subtarget.hasStdExtZhinxmin())
25280 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
25281 } else if (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
25282 return std::make_pair(0U, &RISCV::FPR16RegClass);
25283 } else if (VT == MVT::f32) {
25284 if (Subtarget.hasStdExtF())
25285 return std::make_pair(0U, &RISCV::FPR32RegClass);
25286 if (Subtarget.hasStdExtZfinx())
25287 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
25288 } else if (VT == MVT::f64) {
25289 if (Subtarget.hasStdExtD())
25290 return std::make_pair(0U, &RISCV::FPR64RegClass);
25291 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
25292 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
25293 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
25294 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
25295 }
25296 break;
25297 case 'R':
25298 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
25299 (VT == MVT::i128 && Subtarget.is64Bit()))
25300 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
25301 break;
25302 default:
25303 break;
25304 }
25305 } else if (Constraint == "vr") {
25306 // Check VM and fractional LMUL first so that those types will use that
25307 // class instead of VR.
25308 for (const auto *RC :
25309 {&RISCV::ZZZ_VMRegClass, &RISCV::ZZZ_VRMF8RegClass,
25310 &RISCV::ZZZ_VRMF4RegClass, &RISCV::ZZZ_VRMF2RegClass,
25311 &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
25312 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
25313 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
25314 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
25315 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
25316 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
25317 &RISCV::VRN2M4RegClass}) {
25318 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
25319 return std::make_pair(0U, RC);
25320
25321 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
25322 MVT ContainerVT = getContainerForFixedLengthVector(VT);
25323 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
25324 return std::make_pair(0U, RC);
25325 }
25326 }
25327 } else if (Constraint == "vd") {
25328 // Check VMNoV0 and fractional LMUL first so that those types will use that
25329 // class instead of VRNoV0.
25330 for (const auto *RC :
25331 {&RISCV::ZZZ_VMNoV0RegClass, &RISCV::ZZZ_VRMF8NoV0RegClass,
25332 &RISCV::ZZZ_VRMF4NoV0RegClass, &RISCV::ZZZ_VRMF2NoV0RegClass,
25333 &RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
25334 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
25335 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
25336 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
25337 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
25338 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
25339 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
25340 &RISCV::VRN2M4NoV0RegClass}) {
25341 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
25342 return std::make_pair(0U, RC);
25343
25344 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
25345 MVT ContainerVT = getContainerForFixedLengthVector(VT);
25346 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
25347 return std::make_pair(0U, RC);
25348 }
25349 }
25350 } else if (Constraint == "vm") {
25351 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
25352 return std::make_pair(0U, &RISCV::VMV0RegClass);
25353
25354 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
25355 MVT ContainerVT = getContainerForFixedLengthVector(VT);
25356 // VT here might be coerced to vector with i8 elements, so we need to
25357 // check if this is a M1 register here instead of checking VMV0RegClass.
25358 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
25359 return std::make_pair(0U, &RISCV::VMV0RegClass);
25360 }
25361 } else if (Constraint == "cr") {
25362 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
25363 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
25364 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
25365 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
25366 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
25367 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
25368 if (!VT.isVector())
25369 return std::make_pair(0U, &RISCV::GPRCRegClass);
25370 } else if (Constraint == "cR") {
25371 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
25372 (VT == MVT::i128 && Subtarget.is64Bit()))
25373 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
25374 } else if (Constraint == "cf") {
25375 if (VT == MVT::f16) {
25376 if (Subtarget.hasStdExtZfhmin())
25377 return std::make_pair(0U, &RISCV::FPR16CRegClass);
25378 if (Subtarget.hasStdExtZhinxmin())
25379 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
25380 } else if (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
25381 return std::make_pair(0U, &RISCV::FPR16CRegClass);
25382 } else if (VT == MVT::f32) {
25383 if (Subtarget.hasStdExtF())
25384 return std::make_pair(0U, &RISCV::FPR32CRegClass);
25385 if (Subtarget.hasStdExtZfinx())
25386 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
25387 } else if (VT == MVT::f64) {
25388 if (Subtarget.hasStdExtD())
25389 return std::make_pair(0U, &RISCV::FPR64CRegClass);
25390 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
25391 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
25392 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
25393 return std::make_pair(0U, &RISCV::GPRCRegClass);
25394 }
25395 }
25396
25397 // Clang will correctly decode the usage of register name aliases into their
25398 // official names. However, other frontends like `rustc` do not. This allows
25399 // users of these frontends to use the ABI names for registers in LLVM-style
25400 // register constraints.
25401 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
25402 .Case("{zero}", RISCV::X0)
25403 .Case("{ra}", RISCV::X1)
25404 .Case("{sp}", RISCV::X2)
25405 .Case("{gp}", RISCV::X3)
25406 .Case("{tp}", RISCV::X4)
25407 .Case("{t0}", RISCV::X5)
25408 .Case("{t1}", RISCV::X6)
25409 .Case("{t2}", RISCV::X7)
25410 .Cases({"{s0}", "{fp}"}, RISCV::X8)
25411 .Case("{s1}", RISCV::X9)
25412 .Case("{a0}", RISCV::X10)
25413 .Case("{a1}", RISCV::X11)
25414 .Case("{a2}", RISCV::X12)
25415 .Case("{a3}", RISCV::X13)
25416 .Case("{a4}", RISCV::X14)
25417 .Case("{a5}", RISCV::X15)
25418 .Case("{a6}", RISCV::X16)
25419 .Case("{a7}", RISCV::X17)
25420 .Case("{s2}", RISCV::X18)
25421 .Case("{s3}", RISCV::X19)
25422 .Case("{s4}", RISCV::X20)
25423 .Case("{s5}", RISCV::X21)
25424 .Case("{s6}", RISCV::X22)
25425 .Case("{s7}", RISCV::X23)
25426 .Case("{s8}", RISCV::X24)
25427 .Case("{s9}", RISCV::X25)
25428 .Case("{s10}", RISCV::X26)
25429 .Case("{s11}", RISCV::X27)
25430 .Case("{t3}", RISCV::X28)
25431 .Case("{t4}", RISCV::X29)
25432 .Case("{t5}", RISCV::X30)
25433 .Case("{t6}", RISCV::X31)
25434 .Default(RISCV::NoRegister);
25435 if (XRegFromAlias != RISCV::NoRegister)
25436 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
25437
25438 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
25439 // TableGen record rather than the AsmName to choose registers for InlineAsm
25440 // constraints, plus we want to match those names to the widest floating point
25441 // register type available, manually select floating point registers here.
25442 //
25443 // The second case is the ABI name of the register, so that frontends can also
25444 // use the ABI names in register constraint lists.
25445 if (Subtarget.hasStdExtF()) {
25446 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
25447 .Cases({"{f0}", "{ft0}"}, RISCV::F0_F)
25448 .Cases({"{f1}", "{ft1}"}, RISCV::F1_F)
25449 .Cases({"{f2}", "{ft2}"}, RISCV::F2_F)
25450 .Cases({"{f3}", "{ft3}"}, RISCV::F3_F)
25451 .Cases({"{f4}", "{ft4}"}, RISCV::F4_F)
25452 .Cases({"{f5}", "{ft5}"}, RISCV::F5_F)
25453 .Cases({"{f6}", "{ft6}"}, RISCV::F6_F)
25454 .Cases({"{f7}", "{ft7}"}, RISCV::F7_F)
25455 .Cases({"{f8}", "{fs0}"}, RISCV::F8_F)
25456 .Cases({"{f9}", "{fs1}"}, RISCV::F9_F)
25457 .Cases({"{f10}", "{fa0}"}, RISCV::F10_F)
25458 .Cases({"{f11}", "{fa1}"}, RISCV::F11_F)
25459 .Cases({"{f12}", "{fa2}"}, RISCV::F12_F)
25460 .Cases({"{f13}", "{fa3}"}, RISCV::F13_F)
25461 .Cases({"{f14}", "{fa4}"}, RISCV::F14_F)
25462 .Cases({"{f15}", "{fa5}"}, RISCV::F15_F)
25463 .Cases({"{f16}", "{fa6}"}, RISCV::F16_F)
25464 .Cases({"{f17}", "{fa7}"}, RISCV::F17_F)
25465 .Cases({"{f18}", "{fs2}"}, RISCV::F18_F)
25466 .Cases({"{f19}", "{fs3}"}, RISCV::F19_F)
25467 .Cases({"{f20}", "{fs4}"}, RISCV::F20_F)
25468 .Cases({"{f21}", "{fs5}"}, RISCV::F21_F)
25469 .Cases({"{f22}", "{fs6}"}, RISCV::F22_F)
25470 .Cases({"{f23}", "{fs7}"}, RISCV::F23_F)
25471 .Cases({"{f24}", "{fs8}"}, RISCV::F24_F)
25472 .Cases({"{f25}", "{fs9}"}, RISCV::F25_F)
25473 .Cases({"{f26}", "{fs10}"}, RISCV::F26_F)
25474 .Cases({"{f27}", "{fs11}"}, RISCV::F27_F)
25475 .Cases({"{f28}", "{ft8}"}, RISCV::F28_F)
25476 .Cases({"{f29}", "{ft9}"}, RISCV::F29_F)
25477 .Cases({"{f30}", "{ft10}"}, RISCV::F30_F)
25478 .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)
25479 .Default(RISCV::NoRegister);
25480 if (FReg != RISCV::NoRegister) {
25481 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
25482 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
25483 unsigned RegNo = FReg - RISCV::F0_F;
25484 unsigned DReg = RISCV::F0_D + RegNo;
25485 return std::make_pair(DReg, &RISCV::FPR64RegClass);
25486 }
25487 if (VT == MVT::f32 || VT == MVT::Other)
25488 return std::make_pair(FReg, &RISCV::FPR32RegClass);
25489 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
25490 unsigned RegNo = FReg - RISCV::F0_F;
25491 unsigned HReg = RISCV::F0_H + RegNo;
25492 return std::make_pair(HReg, &RISCV::FPR16RegClass);
25493 }
25494 }
25495 }
25496
25497 if (Subtarget.hasVInstructions()) {
25498 Register VReg = StringSwitch<Register>(Constraint.lower())
25499 .Case("{v0}", RISCV::V0)
25500 .Case("{v1}", RISCV::V1)
25501 .Case("{v2}", RISCV::V2)
25502 .Case("{v3}", RISCV::V3)
25503 .Case("{v4}", RISCV::V4)
25504 .Case("{v5}", RISCV::V5)
25505 .Case("{v6}", RISCV::V6)
25506 .Case("{v7}", RISCV::V7)
25507 .Case("{v8}", RISCV::V8)
25508 .Case("{v9}", RISCV::V9)
25509 .Case("{v10}", RISCV::V10)
25510 .Case("{v11}", RISCV::V11)
25511 .Case("{v12}", RISCV::V12)
25512 .Case("{v13}", RISCV::V13)
25513 .Case("{v14}", RISCV::V14)
25514 .Case("{v15}", RISCV::V15)
25515 .Case("{v16}", RISCV::V16)
25516 .Case("{v17}", RISCV::V17)
25517 .Case("{v18}", RISCV::V18)
25518 .Case("{v19}", RISCV::V19)
25519 .Case("{v20}", RISCV::V20)
25520 .Case("{v21}", RISCV::V21)
25521 .Case("{v22}", RISCV::V22)
25522 .Case("{v23}", RISCV::V23)
25523 .Case("{v24}", RISCV::V24)
25524 .Case("{v25}", RISCV::V25)
25525 .Case("{v26}", RISCV::V26)
25526 .Case("{v27}", RISCV::V27)
25527 .Case("{v28}", RISCV::V28)
25528 .Case("{v29}", RISCV::V29)
25529 .Case("{v30}", RISCV::V30)
25530 .Case("{v31}", RISCV::V31)
25531 .Default(RISCV::NoRegister);
25532 if (VReg != RISCV::NoRegister) {
25533 if (TRI->isTypeLegalForClass(RISCV::ZZZ_VMRegClass, VT.SimpleTy))
25534 return std::make_pair(VReg, &RISCV::ZZZ_VMRegClass);
25535 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
25536 return std::make_pair(VReg, &RISCV::VRRegClass);
25537 for (const auto *RC :
25538 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
25539 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
25540 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
25541 return std::make_pair(VReg, RC);
25542 }
25543 }
25544 }
25545 }
25546
25547 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
25548}
25549
25552 // Currently only support length 1 constraints.
25553 if (ConstraintCode.size() == 1) {
25554 switch (ConstraintCode[0]) {
25555 case 'A':
25557 default:
25558 break;
25559 }
25560 }
25561
25562 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
25563}
25564
25566 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
25567 SelectionDAG &DAG) const {
25568 // Currently only support length 1 constraints.
25569 if (Constraint.size() == 1) {
25570 switch (Constraint[0]) {
25571 case 'I':
25572 // Validate & create a 12-bit signed immediate operand.
25573 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
25574 uint64_t CVal = C->getSExtValue();
25575 if (isInt<12>(CVal))
25576 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
25577 Subtarget.getXLenVT()));
25578 }
25579 return;
25580 case 'J':
25581 // Validate & create an integer zero operand.
25582 if (isNullConstant(Op))
25583 Ops.push_back(
25584 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
25585 return;
25586 case 'K':
25587 // Validate & create a 5-bit unsigned immediate operand.
25588 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
25589 uint64_t CVal = C->getZExtValue();
25590 if (isUInt<5>(CVal))
25591 Ops.push_back(
25592 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
25593 }
25594 return;
25595 case 'S':
25597 return;
25598 default:
25599 break;
25600 }
25601 }
25603}
25604
25606 Instruction *Inst,
25607 AtomicOrdering Ord) const {
25608 if (Subtarget.hasStdExtZtso()) {
25610 return Builder.CreateFence(Ord);
25611 return nullptr;
25612 }
25613
25615 return Builder.CreateFence(Ord);
25616 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
25617 return Builder.CreateFence(AtomicOrdering::Release);
25618 return nullptr;
25619}
25620
25622 Instruction *Inst,
25623 AtomicOrdering Ord) const {
25624 if (Subtarget.hasStdExtZtso()) {
25626 return Builder.CreateFence(Ord);
25627 return nullptr;
25628 }
25629
25630 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
25631 return Builder.CreateFence(AtomicOrdering::Acquire);
25632 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
25634 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
25635 return nullptr;
25636}
25637
25640 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
25641 // point operations can't be used in an lr/sc sequence without breaking the
25642 // forward-progress guarantee.
25643 if (AI->isFloatingPointOperation() ||
25649
25650 // Don't expand forced atomics, we want to have __sync libcalls instead.
25651 if (Subtarget.hasForcedAtomics())
25653
25654 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
25655 if (AI->getOperation() == AtomicRMWInst::Nand) {
25656 if (Subtarget.hasStdExtZacas() &&
25657 (Size >= 32 || Subtarget.hasStdExtZabha()))
25659 if (Size < 32)
25661 }
25662
25663 if (Size < 32 && !Subtarget.hasStdExtZabha())
25665
25667}
25668
25669static Intrinsic::ID
25671 switch (BinOp) {
25672 default:
25673 llvm_unreachable("Unexpected AtomicRMW BinOp");
25675 return Intrinsic::riscv_masked_atomicrmw_xchg;
25676 case AtomicRMWInst::Add:
25677 return Intrinsic::riscv_masked_atomicrmw_add;
25678 case AtomicRMWInst::Sub:
25679 return Intrinsic::riscv_masked_atomicrmw_sub;
25681 return Intrinsic::riscv_masked_atomicrmw_nand;
25682 case AtomicRMWInst::Max:
25683 return Intrinsic::riscv_masked_atomicrmw_max;
25684 case AtomicRMWInst::Min:
25685 return Intrinsic::riscv_masked_atomicrmw_min;
25687 return Intrinsic::riscv_masked_atomicrmw_umax;
25689 return Intrinsic::riscv_masked_atomicrmw_umin;
25690 }
25691}
25692
25694 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
25695 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
25696 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
25697 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
25698 // mask, as this produces better code than the LR/SC loop emitted by
25699 // int_riscv_masked_atomicrmw_xchg.
25700 if (AI->getOperation() == AtomicRMWInst::Xchg &&
25703 if (CVal->isZero())
25704 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
25705 Builder.CreateNot(Mask, "Inv_Mask"),
25706 AI->getAlign(), Ord);
25707 if (CVal->isMinusOne())
25708 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
25709 AI->getAlign(), Ord);
25710 }
25711
25712 unsigned XLen = Subtarget.getXLen();
25713 Value *Ordering =
25714 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
25715 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
25717 AI->getModule(),
25719
25720 if (XLen == 64) {
25721 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
25722 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
25723 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
25724 }
25725
25726 Value *Result;
25727
25728 // Must pass the shift amount needed to sign extend the loaded value prior
25729 // to performing a signed comparison for min/max. ShiftAmt is the number of
25730 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
25731 // is the number of bits to left+right shift the value in order to
25732 // sign-extend.
25733 if (AI->getOperation() == AtomicRMWInst::Min ||
25735 const DataLayout &DL = AI->getDataLayout();
25736 unsigned ValWidth =
25737 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
25738 Value *SextShamt =
25739 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
25740 Result = Builder.CreateCall(LrwOpScwLoop,
25741 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
25742 } else {
25743 Result =
25744 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
25745 }
25746
25747 if (XLen == 64)
25748 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
25749 return Result;
25750}
25751
25754 const AtomicCmpXchgInst *CI) const {
25755 // Don't expand forced atomics, we want to have __sync libcalls instead.
25756 if (Subtarget.hasForcedAtomics())
25758
25760 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
25761 (Size == 8 || Size == 16))
25764}
25765
25767 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
25768 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
25769 unsigned XLen = Subtarget.getXLen();
25770 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
25771 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
25772 if (XLen == 64) {
25773 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
25774 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
25775 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
25776 }
25777 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
25778 Value *Result = Builder.CreateIntrinsic(
25779 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
25780 if (XLen == 64)
25781 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
25782 return Result;
25783}
25784
25786 EVT DataVT) const {
25787 // We have indexed loads for all supported EEW types. Indices are always
25788 // zero extended.
25789 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
25790 isTypeLegal(Extend.getValueType()) &&
25791 isTypeLegal(Extend.getOperand(0).getValueType()) &&
25792 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
25793}
25794
25796 EVT VT) const {
25797 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
25798 return false;
25799
25800 switch (FPVT.getSimpleVT().SimpleTy) {
25801 case MVT::f16:
25802 return Subtarget.hasStdExtZfhmin();
25803 case MVT::f32:
25804 return Subtarget.hasStdExtF();
25805 case MVT::f64:
25806 return Subtarget.hasStdExtD();
25807 default:
25808 return false;
25809 }
25810}
25811
25813 // If we are using the small code model, we can reduce size of jump table
25814 // entry to 4 bytes.
25815 if (Subtarget.is64Bit() && !isPositionIndependent() &&
25818 }
25820}
25821
25823 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
25824 unsigned uid, MCContext &Ctx) const {
25825 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
25827 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
25828}
25829
25831 SDValue &Offset,
25833 SelectionDAG &DAG) const {
25834 // Target does not support indexed loads.
25835 if (!Subtarget.hasVendorXTHeadMemIdx())
25836 return false;
25837
25838 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
25839 return false;
25840
25841 Base = Op->getOperand(0);
25842 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
25843 int64_t RHSC = RHS->getSExtValue();
25844 if (Op->getOpcode() == ISD::SUB)
25845 RHSC = -(uint64_t)RHSC;
25846
25847 // The constants that can be encoded in the THeadMemIdx instructions
25848 // are of the form (sign_extend(imm5) << imm2).
25849 bool isLegalIndexedOffset = false;
25850 for (unsigned i = 0; i < 4; i++)
25851 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
25852 isLegalIndexedOffset = true;
25853 break;
25854 }
25855
25856 if (!isLegalIndexedOffset)
25857 return false;
25858
25859 Offset = Op->getOperand(1);
25860 return true;
25861 }
25862
25863 return false;
25864}
25865
25867 SDValue &Offset,
25869 SelectionDAG &DAG) const {
25870 EVT VT;
25871 SDValue Ptr;
25872 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
25873 VT = LD->getMemoryVT();
25874 Ptr = LD->getBasePtr();
25875 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
25876 VT = ST->getMemoryVT();
25877 Ptr = ST->getBasePtr();
25878 } else
25879 return false;
25880
25881 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
25882 return false;
25883
25884 AM = ISD::PRE_INC;
25885 return true;
25886}
25887
25889 SDValue &Base,
25890 SDValue &Offset,
25892 SelectionDAG &DAG) const {
25893 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
25894 if (Op->getOpcode() != ISD::ADD)
25895 return false;
25896
25898 Base = LS->getBasePtr();
25899 else
25900 return false;
25901
25902 if (Base == Op->getOperand(0))
25903 Offset = Op->getOperand(1);
25904 else if (Base == Op->getOperand(1))
25905 Offset = Op->getOperand(0);
25906 else
25907 return false;
25908
25909 AM = ISD::POST_INC;
25910 return true;
25911 }
25912
25913 EVT VT;
25914 SDValue Ptr;
25915 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
25916 VT = LD->getMemoryVT();
25917 Ptr = LD->getBasePtr();
25918 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
25919 VT = ST->getMemoryVT();
25920 Ptr = ST->getBasePtr();
25921 } else
25922 return false;
25923
25924 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
25925 return false;
25926 // Post-indexing updates the base, so it's not a valid transform
25927 // if that's not the same as the load's pointer.
25928 if (Ptr != Base)
25929 return false;
25930
25931 AM = ISD::POST_INC;
25932 return true;
25933}
25934
25936 EVT VT) const {
25937 EVT SVT = VT.getScalarType();
25938
25939 if (!SVT.isSimple())
25940 return false;
25941
25942 switch (SVT.getSimpleVT().SimpleTy) {
25943 case MVT::f16:
25944 return VT.isVector() ? Subtarget.hasVInstructionsF16()
25945 : Subtarget.hasStdExtZfhOrZhinx();
25946 case MVT::f32:
25947 return Subtarget.hasStdExtFOrZfinx();
25948 case MVT::f64:
25949 return Subtarget.hasStdExtDOrZdinx();
25950 default:
25951 break;
25952 }
25953
25954 return false;
25955}
25956
25958 // Zacas will use amocas.w which does not require extension.
25959 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
25960}
25961
25963 // Zaamo will use amo<op>.w which does not require extension.
25964 if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics())
25965 return ISD::ANY_EXTEND;
25966
25967 // Zalrsc pseudo expansions with comparison require sign-extension.
25968 assert(Subtarget.hasStdExtZalrsc());
25969 switch (Op) {
25974 return ISD::SIGN_EXTEND;
25975 default:
25976 break;
25977 }
25978 return ISD::ANY_EXTEND;
25979}
25980
25982 const Constant *PersonalityFn) const {
25983 return RISCV::X10;
25984}
25985
25987 const Constant *PersonalityFn) const {
25988 return RISCV::X11;
25989}
25990
25992 // Return false to suppress the unnecessary extensions if the LibCall
25993 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
25994 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
25995 Type.getSizeInBits() < Subtarget.getXLen()))
25996 return false;
25997
25998 return true;
25999}
26000
26002 bool IsSigned) const {
26003 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
26004 return true;
26005
26006 return IsSigned;
26007}
26008
26010 SDValue C) const {
26011 // Check integral scalar types.
26012 if (!VT.isScalarInteger())
26013 return false;
26014
26015 // Omit the optimization if the sub target has the M extension and the data
26016 // size exceeds XLen.
26017 const bool HasZmmul = Subtarget.hasStdExtZmmul();
26018 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
26019 return false;
26020
26021 auto *ConstNode = cast<ConstantSDNode>(C);
26022 const APInt &Imm = ConstNode->getAPIntValue();
26023
26024 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
26025 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
26026 return false;
26027
26028 // Break the MUL to a SLLI and an ADD/SUB.
26029 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
26030 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
26031 return true;
26032
26033 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
26034 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
26035 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
26036 (Imm - 8).isPowerOf2()))
26037 return true;
26038
26039 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
26040 // a pair of LUI/ADDI.
26041 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
26042 ConstNode->hasOneUse()) {
26043 APInt ImmS = Imm.ashr(Imm.countr_zero());
26044 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
26045 (1 - ImmS).isPowerOf2())
26046 return true;
26047 }
26048
26049 return false;
26050}
26051
26053 SDValue ConstNode) const {
26054 // Let the DAGCombiner decide for vectors.
26055 EVT VT = AddNode.getValueType();
26056 if (VT.isVector())
26057 return true;
26058
26059 // Let the DAGCombiner decide for larger types.
26060 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
26061 return true;
26062
26063 // It is worse if c1 is simm12 while c1*c2 is not.
26064 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
26065 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
26066 const APInt &C1 = C1Node->getAPIntValue();
26067 const APInt &C2 = C2Node->getAPIntValue();
26068 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
26069 return false;
26070
26071 // Default to true and let the DAGCombiner decide.
26072 return true;
26073}
26074
26076 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
26077 unsigned *Fast) const {
26078 if (!VT.isVector() || Subtarget.hasStdExtP()) {
26079 if (Fast)
26080 *Fast = Subtarget.enableUnalignedScalarMem();
26081 return Subtarget.enableUnalignedScalarMem();
26082 }
26083
26084 // All vector implementations must support element alignment
26085 EVT ElemVT = VT.getVectorElementType();
26086 if (Alignment >= ElemVT.getStoreSize()) {
26087 if (Fast)
26088 *Fast = 1;
26089 return true;
26090 }
26091
26092 // Note: We lower an unmasked unaligned vector access to an equally sized
26093 // e8 element type access. Given this, we effectively support all unmasked
26094 // misaligned accesses. TODO: Work through the codegen implications of
26095 // allowing such accesses to be formed, and considered fast.
26096 if (Fast)
26097 *Fast = Subtarget.enableUnalignedVectorMem();
26098 return Subtarget.enableUnalignedVectorMem();
26099}
26100
26102 LLVMContext &Context, const MemOp &Op,
26103 const AttributeList &FuncAttributes) const {
26104 if (!Subtarget.hasVInstructions())
26105 return MVT::Other;
26106
26107 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
26108 return MVT::Other;
26109
26110 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
26111 // has an expansion threshold, and we want the number of hardware memory
26112 // operations to correspond roughly to that threshold. LMUL>1 operations
26113 // are typically expanded linearly internally, and thus correspond to more
26114 // than one actual memory operation. Note that store merging and load
26115 // combining will typically form larger LMUL operations from the LMUL1
26116 // operations emitted here, and that's okay because combining isn't
26117 // introducing new memory operations; it's just merging existing ones.
26118 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
26119 const unsigned MinVLenInBytes =
26120 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
26121
26122 if (Op.size() < MinVLenInBytes)
26123 // TODO: Figure out short memops. For the moment, do the default thing
26124 // which ends up using scalar sequences.
26125 return MVT::Other;
26126
26127 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
26128 // fixed vectors.
26129 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
26130 return MVT::Other;
26131
26132 // Prefer i8 for non-zero memset as it allows us to avoid materializing
26133 // a large scalar constant and instead use vmv.v.x/i to do the
26134 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
26135 // maximize the chance we can encode the size in the vsetvli.
26136 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
26137 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
26138
26139 // Do we have sufficient alignment for our preferred VT? If not, revert
26140 // to largest size allowed by our alignment criteria.
26141 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
26142 Align RequiredAlign(PreferredVT.getStoreSize());
26143 if (Op.isFixedDstAlign())
26144 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
26145 if (Op.isMemcpy())
26146 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
26147 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
26148 }
26149 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
26150}
26151
26153 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
26154 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
26155 bool IsABIRegCopy = CC.has_value();
26156 EVT ValueVT = Val.getValueType();
26157
26158 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
26159 if ((ValueVT == PairVT ||
26160 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
26161 ValueVT == MVT::f64)) &&
26162 NumParts == 1 && PartVT == MVT::Untyped) {
26163 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
26164 MVT XLenVT = Subtarget.getXLenVT();
26165 if (ValueVT == MVT::f64)
26166 Val = DAG.getBitcast(MVT::i64, Val);
26167 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
26168 // Always creating an MVT::Untyped part, so always use
26169 // RISCVISD::BuildGPRPair.
26170 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
26171 return true;
26172 }
26173
26174 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
26175 PartVT == MVT::f32) {
26176 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
26177 // nan, and cast to f32.
26178 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
26179 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
26180 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
26181 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
26182 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
26183 Parts[0] = Val;
26184 return true;
26185 }
26186
26187 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
26188#ifndef NDEBUG
26189 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
26190 [[maybe_unused]] unsigned ValLMUL =
26192 ValNF * RISCV::RVVBitsPerBlock);
26193 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
26194 [[maybe_unused]] unsigned PartLMUL =
26196 PartNF * RISCV::RVVBitsPerBlock);
26197 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
26198 "RISC-V vector tuple type only accepts same register class type "
26199 "TUPLE_INSERT");
26200#endif
26201
26202 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
26203 Val, DAG.getTargetConstant(0, DL, MVT::i32));
26204 Parts[0] = Val;
26205 return true;
26206 }
26207
26208 if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {
26209 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
26210 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
26211
26212 LLVMContext &Context = *DAG.getContext();
26213 EVT ValueEltVT = ValueVT.getVectorElementType();
26214 EVT PartEltVT = PartVT.getVectorElementType();
26215 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
26216 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
26217 if (PartVTBitSize % ValueVTBitSize == 0) {
26218 assert(PartVTBitSize >= ValueVTBitSize);
26219 // If the element types are different, bitcast to the same element type of
26220 // PartVT first.
26221 // Give an example here, we want copy a <vscale x 1 x i8> value to
26222 // <vscale x 4 x i16>.
26223 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
26224 // subvector, then we can bitcast to <vscale x 4 x i16>.
26225 if (ValueEltVT != PartEltVT) {
26226 if (PartVTBitSize > ValueVTBitSize) {
26227 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
26228 assert(Count != 0 && "The number of element should not be zero.");
26229 EVT SameEltTypeVT =
26230 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
26231 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
26232 }
26233 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
26234 } else {
26235 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
26236 }
26237 Parts[0] = Val;
26238 return true;
26239 }
26240 }
26241
26242 return false;
26243}
26244
26246 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
26247 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
26248 bool IsABIRegCopy = CC.has_value();
26249
26250 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
26251 if ((ValueVT == PairVT ||
26252 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
26253 ValueVT == MVT::f64)) &&
26254 NumParts == 1 && PartVT == MVT::Untyped) {
26255 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
26256 MVT XLenVT = Subtarget.getXLenVT();
26257
26258 SDValue Val = Parts[0];
26259 // Always starting with an MVT::Untyped part, so always use
26260 // RISCVISD::SplitGPRPair
26261 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
26262 Val);
26263 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
26264 Val.getValue(1));
26265 if (ValueVT == MVT::f64)
26266 Val = DAG.getBitcast(ValueVT, Val);
26267 return Val;
26268 }
26269
26270 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
26271 PartVT == MVT::f32) {
26272 SDValue Val = Parts[0];
26273
26274 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
26275 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
26276 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
26277 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
26278 return Val;
26279 }
26280
26281 if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {
26282 LLVMContext &Context = *DAG.getContext();
26283 SDValue Val = Parts[0];
26284 EVT ValueEltVT = ValueVT.getVectorElementType();
26285 EVT PartEltVT = PartVT.getVectorElementType();
26286
26287 unsigned ValueVTBitSize =
26289 .getSizeInBits()
26291
26292 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
26293 if (PartVTBitSize % ValueVTBitSize == 0) {
26294 assert(PartVTBitSize >= ValueVTBitSize);
26295 EVT SameEltTypeVT = ValueVT;
26296 // If the element types are different, convert it to the same element type
26297 // of PartVT.
26298 // Give an example here, we want copy a <vscale x 1 x i8> value from
26299 // <vscale x 4 x i16>.
26300 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
26301 // then we can extract <vscale x 1 x i8>.
26302 if (ValueEltVT != PartEltVT) {
26303 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
26304 assert(Count != 0 && "The number of element should not be zero.");
26305 SameEltTypeVT =
26306 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
26307 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
26308 }
26309 if (ValueVT.isFixedLengthVector())
26310 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
26311 else
26312 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
26313 return Val;
26314 }
26315 }
26316 return SDValue();
26317}
26318
26319bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
26320 // When aggressively optimizing for code size, we prefer to use a div
26321 // instruction, as it is usually smaller than the alternative sequence.
26322 // TODO: Add vector division?
26323 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
26324 return OptSize && !VT.isVector() &&
26326}
26327
26332
26334 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
26335 // some situation.
26336 unsigned Opc = N->getOpcode();
26338 return false;
26339 return true;
26340}
26341
26342static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
26343 Module *M = IRB.GetInsertBlock()->getModule();
26344 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
26345 M, Intrinsic::thread_pointer, IRB.getPtrTy());
26346 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
26347 IRB.CreateCall(ThreadPointerFunc), Offset);
26348}
26349
26351 IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const {
26352 // Fuchsia provides a fixed TLS slot for the stack cookie.
26353 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
26354 if (Subtarget.isTargetFuchsia())
26355 return useTpOffset(IRB, -0x10);
26356
26357 // Android provides a fixed TLS slot for the stack cookie. See the definition
26358 // of TLS_SLOT_STACK_GUARD in
26359 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
26360 if (Subtarget.isTargetAndroid())
26361 return useTpOffset(IRB, -0x18);
26362
26363 Module *M = IRB.GetInsertBlock()->getModule();
26364
26365 if (M->getStackProtectorGuard() == "tls") {
26366 // Users must specify the offset explicitly
26367 int Offset = M->getStackProtectorGuardOffset();
26368 return useTpOffset(IRB, Offset);
26369 }
26370
26371 return TargetLowering::getIRStackGuard(IRB, Libcalls);
26372}
26373
26375 Align Alignment) const {
26376 if (!Subtarget.hasVInstructions())
26377 return false;
26378
26379 // Only support fixed vectors if we know the minimum vector size.
26380 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
26381 return false;
26382
26383 EVT ScalarType = DataType.getScalarType();
26384 if (!isLegalElementTypeForRVV(ScalarType))
26385 return false;
26386
26387 if (!Subtarget.enableUnalignedVectorMem() &&
26388 Alignment < ScalarType.getStoreSize())
26389 return false;
26390
26391 return true;
26392}
26393
26395 Align Alignment) const {
26396 if (!Subtarget.hasVInstructions())
26397 return false;
26398
26399 EVT ScalarType = DataType.getScalarType();
26400 if (!isLegalElementTypeForRVV(ScalarType))
26401 return false;
26402
26403 if (!Subtarget.enableUnalignedVectorMem() &&
26404 Alignment < ScalarType.getStoreSize())
26405 return false;
26406
26407 return true;
26408}
26409
26413 const TargetInstrInfo *TII) const {
26414 assert(MBBI->isCall() && MBBI->getCFIType() &&
26415 "Invalid call instruction for a KCFI check");
26416 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
26417 MBBI->getOpcode()));
26418
26419 MachineOperand &Target = MBBI->getOperand(0);
26420 Target.setIsRenamable(false);
26421
26422 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
26423 .addReg(Target.getReg())
26424 .addImm(MBBI->getCFIType())
26425 .getInstr();
26426}
26427
26428#define GET_REGISTER_MATCHER
26429#include "RISCVGenAsmMatcher.inc"
26430
26433 const MachineFunction &MF) const {
26435 if (!Reg)
26437 if (!Reg)
26438 return Reg;
26439
26440 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
26441 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
26442 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
26443 StringRef(RegName) + "\"."));
26444 return Reg;
26445}
26446
26449 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
26450
26451 if (NontemporalInfo == nullptr)
26453
26454 // 1 for default value work as __RISCV_NTLH_ALL
26455 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
26456 // 3 -> __RISCV_NTLH_ALL_PRIVATE
26457 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
26458 // 5 -> __RISCV_NTLH_ALL
26459 int NontemporalLevel = 5;
26460 const MDNode *RISCVNontemporalInfo =
26461 I.getMetadata("riscv-nontemporal-domain");
26462 if (RISCVNontemporalInfo != nullptr)
26463 NontemporalLevel =
26465 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
26466 ->getValue())
26467 ->getZExtValue();
26468
26469 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
26470 "RISC-V target doesn't support this non-temporal domain.");
26471
26472 NontemporalLevel -= 2;
26474 if (NontemporalLevel & 0b1)
26475 Flags |= MONontemporalBit0;
26476 if (NontemporalLevel & 0b10)
26477 Flags |= MONontemporalBit1;
26478
26479 return Flags;
26480}
26481
26484
26485 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
26487 TargetFlags |= (NodeFlags & MONontemporalBit0);
26488 TargetFlags |= (NodeFlags & MONontemporalBit1);
26489 return TargetFlags;
26490}
26491
26493 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
26494 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
26495}
26496
26498 if (VT.isVector()) {
26499 EVT SVT = VT.getVectorElementType();
26500 // If the element type is legal we can use cpop.v if it is enabled.
26501 if (isLegalElementTypeForRVV(SVT))
26502 return Subtarget.hasStdExtZvbb();
26503 // Don't consider it fast if the type needs to be legalized or scalarized.
26504 return false;
26505 }
26506
26507 return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
26508}
26509
26511 ISD::CondCode Cond) const {
26512 return isCtpopFast(VT) ? 0 : 1;
26513}
26514
26516 const Instruction *I) const {
26517 if (Subtarget.hasStdExtZalasr()) {
26518 if (Subtarget.hasStdExtZtso()) {
26519 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
26520 // should be lowered to plain load/store. The easiest way to do this is
26521 // to say we should insert fences for them, and the fence insertion code
26522 // will just not insert any fences
26523 auto *LI = dyn_cast<LoadInst>(I);
26524 auto *SI = dyn_cast<StoreInst>(I);
26525 if ((LI &&
26526 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
26527 (SI &&
26528 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
26529 // Here, this is a load or store which is seq_cst, and needs a .aq or
26530 // .rl therefore we shouldn't try to insert fences
26531 return false;
26532 }
26533 // Here, we are a TSO inst that isn't a seq_cst load/store
26534 return isa<LoadInst>(I) || isa<StoreInst>(I);
26535 }
26536 return false;
26537 }
26538 // Note that one specific case requires fence insertion for an
26539 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
26540 // than this hook due to limitations in the interface here.
26541 return isa<LoadInst>(I) || isa<StoreInst>(I);
26542}
26543
26545
26546 // GISel support is in progress or complete for these opcodes.
26547 unsigned Op = Inst.getOpcode();
26548 if (Op == Instruction::Add || Op == Instruction::Sub ||
26549 Op == Instruction::And || Op == Instruction::Or ||
26550 Op == Instruction::Xor || Op == Instruction::InsertElement ||
26551 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
26552 Op == Instruction::Freeze || Op == Instruction::Store)
26553 return false;
26554
26555 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
26556 // Mark RVV intrinsic as supported.
26557 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID())) {
26558 // GISel doesn't support tuple types yet. It also doesn't suport returning
26559 // a struct containing a scalable vector like vleff.
26560 if (Inst.getType()->isRISCVVectorTupleTy() ||
26561 Inst.getType()->isStructTy())
26562 return true;
26563
26564 for (unsigned i = 0; i < II->arg_size(); ++i)
26565 if (II->getArgOperand(i)->getType()->isRISCVVectorTupleTy())
26566 return true;
26567
26568 return false;
26569 }
26570 if (II->getIntrinsicID() == Intrinsic::vector_extract)
26571 return false;
26572 }
26573
26574 if (Inst.getType()->isScalableTy())
26575 return true;
26576
26577 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
26578 if (Inst.getOperand(i)->getType()->isScalableTy() &&
26579 !isa<ReturnInst>(&Inst))
26580 return true;
26581
26582 return false;
26583}
26584
26585SDValue
26586RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
26587 SelectionDAG &DAG,
26588 SmallVectorImpl<SDNode *> &Created) const {
26589 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
26590 if (isIntDivCheap(N->getValueType(0), Attr))
26591 return SDValue(N, 0); // Lower SDIV as SDIV
26592
26593 // Only perform this transform if short forward branch opt is supported.
26594 if (!Subtarget.hasShortForwardBranchIALU())
26595 return SDValue();
26596 EVT VT = N->getValueType(0);
26597 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
26598 return SDValue();
26599
26600 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
26601 if (Divisor.sgt(2048) || Divisor.slt(-2048))
26602 return SDValue();
26603 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
26604}
26605
26606bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
26607 EVT VT, const APInt &AndMask) const {
26608 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
26609 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
26611}
26612
26614 return Subtarget.getMinimumJumpTableEntries();
26615}
26616
26618 SDValue Value, SDValue Addr,
26619 int JTI,
26620 SelectionDAG &DAG) const {
26621 if (Subtarget.hasStdExtZicfilp()) {
26622 // When Zicfilp enabled, we need to use software guarded branch for jump
26623 // table branch.
26624 SDValue Chain = Value;
26625 // Jump table debug info is only needed if CodeView is enabled.
26627 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
26628 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
26629 }
26630 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
26631}
26632
26633// If an output pattern produces multiple instructions tablegen may pick an
26634// arbitrary type from an instructions destination register class to use for the
26635// VT of that MachineSDNode. This VT may be used to look up the representative
26636// register class. If the type isn't legal, the default implementation will
26637// not find a register class.
26638//
26639// Some integer types smaller than XLen are listed in the GPR register class to
26640// support isel patterns for GISel, but are not legal in SelectionDAG. The
26641// arbitrary type tablegen picks may be one of these smaller types.
26642//
26643// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
26644// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
26645std::pair<const TargetRegisterClass *, uint8_t>
26646RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
26647 MVT VT) const {
26648 switch (VT.SimpleTy) {
26649 default:
26650 break;
26651 case MVT::i8:
26652 case MVT::i16:
26653 case MVT::i32:
26655 case MVT::bf16:
26656 case MVT::f16:
26658 }
26659
26661}
26662
26664
26665#define GET_RISCVVIntrinsicsTable_IMPL
26666#include "RISCVGenSearchableTables.inc"
26667
26668} // namespace llvm::RISCVVIntrinsicsTable
26669
26671
26672 // If the function specifically requests inline stack probes, emit them.
26673 if (MF.getFunction().hasFnAttribute("probe-stack"))
26674 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
26675 "inline-asm";
26676
26677 return false;
26678}
26679
26681 Align StackAlign) const {
26682 // The default stack probe size is 4096 if the function has no
26683 // stack-probe-size attribute.
26684 const Function &Fn = MF.getFunction();
26685 unsigned StackProbeSize =
26686 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
26687 // Round down to the stack alignment.
26688 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
26689 return StackProbeSize ? StackProbeSize : StackAlign.value();
26690}
26691
26692SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
26693 SelectionDAG &DAG) const {
26695 if (!hasInlineStackProbe(MF))
26696 return SDValue();
26697
26698 MVT XLenVT = Subtarget.getXLenVT();
26699 // Get the inputs.
26700 SDValue Chain = Op.getOperand(0);
26701 SDValue Size = Op.getOperand(1);
26702
26704 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
26705 SDLoc dl(Op);
26706 EVT VT = Op.getValueType();
26707
26708 // Construct the new SP value in a GPR.
26709 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
26710 Chain = SP.getValue(1);
26711 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
26712 if (Align)
26713 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
26714 DAG.getSignedConstant(-Align->value(), dl, VT));
26715
26716 // Set the real SP to the new value with a probing loop.
26717 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
26718 return DAG.getMergeValues({SP, Chain}, dl);
26719}
26720
26723 MachineBasicBlock *MBB) const {
26724 MachineFunction &MF = *MBB->getParent();
26725 MachineBasicBlock::iterator MBBI = MI.getIterator();
26726 DebugLoc DL = MBB->findDebugLoc(MBBI);
26727 Register TargetReg = MI.getOperand(0).getReg();
26728
26729 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
26730 bool IsRV64 = Subtarget.is64Bit();
26731 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
26732 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
26733 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
26734
26735 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
26736 MachineBasicBlock *LoopTestMBB =
26737 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
26738 MF.insert(MBBInsertPoint, LoopTestMBB);
26739 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
26740 MF.insert(MBBInsertPoint, ExitMBB);
26741 Register SPReg = RISCV::X2;
26742 Register ScratchReg =
26743 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
26744
26745 // ScratchReg = ProbeSize
26746 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
26747
26748 // LoopTest:
26749 // SUB SP, SP, ProbeSize
26750 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
26751 .addReg(SPReg)
26752 .addReg(ScratchReg);
26753
26754 // s[d|w] zero, 0(sp)
26755 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
26756 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
26757 .addReg(RISCV::X0)
26758 .addReg(SPReg)
26759 .addImm(0);
26760
26761 // BLT TargetReg, SP, LoopTest
26762 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
26763 .addReg(TargetReg)
26764 .addReg(SPReg)
26765 .addMBB(LoopTestMBB);
26766
26767 // Adjust with: MV SP, TargetReg.
26768 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
26769 .addReg(TargetReg)
26770 .addImm(0);
26771
26772 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
26774
26775 LoopTestMBB->addSuccessor(ExitMBB);
26776 LoopTestMBB->addSuccessor(LoopTestMBB);
26777 MBB->addSuccessor(LoopTestMBB);
26778
26779 MI.eraseFromParent();
26780 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
26781 return ExitMBB->begin()->getParent();
26782}
26783
26785 if (Subtarget.hasStdExtFOrZfinx()) {
26786 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
26787 return RCRegs;
26788 }
26789 return {};
26790}
26791
26793 EVT VT = Y.getValueType();
26794
26795 if (VT.isVector())
26796 return false;
26797
26798 return VT.getSizeInBits() <= Subtarget.getXLen();
26799}
26800
26802 SDValue N1) const {
26803 if (!N0.hasOneUse())
26804 return false;
26805
26806 // Avoid reassociating expressions that can be lowered to vector
26807 // multiply accumulate (i.e. add (mul x, y), z)
26808 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::MUL &&
26809 (N0.getValueType().isVector() && Subtarget.hasVInstructions()))
26810 return false;
26811
26812 return true;
26813}
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static constexpr unsigned long long mask(BlockVerifier::State S)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI)
Definition CostModel.cpp:73
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue OrigOp, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue combineVdota4Accum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(3))
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue compressShuffleOfShuffles(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static const unsigned ZvfbfaVPOps[]
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static void promoteVCIXScalar(SDValue Op, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue combineOrToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG, unsigned Type)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const unsigned ZvfbfaOps[]
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineNarrowableShiftedLoad(SDNode *N, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX, unsigned ShY, bool AddX, unsigned Shift)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static SDValue performVWABDACombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue foldReduceOperandViaVDOTA4(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt, unsigned Shift)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses, bool IsCopyable=false)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1402
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1387
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1164
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1655
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1411
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1747
bool isMask(unsigned numBits) const
Definition APInt.h:489
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & set()
Definition BitVector.h:370
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:194
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This class represents a range of values.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:776
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
Helper struct to store a base, index and offset that forms an address.
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1957
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2487
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:604
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:551
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Tracks which library functions to use for a particular subtarget.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:358
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
bool isLittleEndian() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override
Returns how the platform's atomic rmw operations expect their input argument to be extended (ZERO_EXT...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const
Return true if a fault-only-first load of the given result type and alignment is legal.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Customize the preferred legalization strategy for certain types.
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:808
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI bool isRISCVVectorTupleTy() const
Definition Type.cpp:146
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SET_FPENV
Sets the current floating-point environment.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ RESET_FPENV
Set floating-point environment to default state.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:975
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:635
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:691
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ CTLS
Count leading redundant sign bits.
Definition ISDOpcodes.h:792
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ STRICT_FP_TO_FP16
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
@ STRICT_FP16_TO_FP
@ GET_FPENV
Gets the current floating-point environment.
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:640
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:464
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:681
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:699
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:624
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:871
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:875
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
const PseudoInfo * getBaseInfo(unsigned BaseInstr, uint8_t VLMul, uint8_t SEW, bool IsAltFmt=false)
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Opcode_match m_Opc(unsigned Opcode)
auto m_ExactSr(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
TernaryOpc_match< LHS, RHS, IDX > m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1590
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
int isShifted359(T Value, int &Shift)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:410
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:372
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:438
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:187
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:427
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:290
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:167
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:178
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:312
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:327
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:186
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:363
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:296
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...