LLVM 23.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->hasStdExtP())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
891 SDValue Lo, SDValue Hi) {
892 SDValue Ops[] = {
893 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
894 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
895 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
896
897 return SDValue(
898 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops), 0);
899}
900
901// Helper to extract Lo and Hi values from a GPR pair.
902static std::pair<SDValue, SDValue>
904 SDValue Lo =
905 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32, Pair);
906 SDValue Hi =
907 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32, Pair);
908 return {Lo, Hi};
909}
910
911// Try to match WMACC pattern: ADDD where one operand pair comes from a
912// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
914 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
915
916 SDValue Op0Lo = Node->getOperand(0);
917 SDValue Op0Hi = Node->getOperand(1);
918 SDValue Op1Lo = Node->getOperand(2);
919 SDValue Op1Hi = Node->getOperand(3);
920
921 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
922 unsigned Opc = Lo.getOpcode();
923 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
924 Opc != RISCVISD::WMULSU)
925 return false;
926 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
927 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
928 };
929
930 SDNode *MulNode = nullptr;
931 SDValue AddLo, AddHi;
932
933 // Check if first operand pair is a supported multiply with single use.
934 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
935 MulNode = Op0Lo.getNode();
936 AddLo = Op1Lo;
937 AddHi = Op1Hi;
938 }
939 // ADDD is commutative. Check if second operand pair is a supported multiply
940 // with single use.
941 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
942 MulNode = Op1Lo.getNode();
943 AddLo = Op0Lo;
944 AddHi = Op0Hi;
945 } else {
946 return false;
947 }
948
949 unsigned Opc;
950 switch (MulNode->getOpcode()) {
951 default:
952 llvm_unreachable("Unexpected multiply opcode");
953 case ISD::UMUL_LOHI:
954 Opc = RISCV::WMACCU;
955 break;
956 case ISD::SMUL_LOHI:
957 Opc = RISCV::WMACC;
958 break;
959 case RISCVISD::WMULSU:
960 Opc = RISCV::WMACCSU;
961 break;
962 }
963
964 SDValue Acc = buildGPRPair(CurDAG, DL, MVT::Untyped, AddLo, AddHi);
965
966 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
967 SDValue M0 = MulNode->getOperand(0);
968 SDValue M1 = MulNode->getOperand(1);
969 MachineSDNode *New =
970 CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Acc, M0, M1);
971
972 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
975 CurDAG->RemoveDeadNode(Node);
976 return true;
977}
978
979static Register getTileReg(uint64_t TileNum) {
980 assert(TileNum <= 15 && "Invalid tile number");
981 return RISCV::T0 + TileNum;
982}
983
985 if (!Subtarget->hasVInstructions())
986 return;
987
988 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
989
990 SDLoc DL(Node);
991 unsigned IntNo = Node->getConstantOperandVal(1);
992
993 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
994 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
995 "Unexpected vsetvli intrinsic");
996
997 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
998 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
999 SDValue SEWOp =
1000 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
1001 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
1002 Node->getOperand(4), Node->getOperand(5),
1003 Node->getOperand(8), SEWOp,
1004 Node->getOperand(0)};
1005
1006 unsigned Opcode;
1007 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
1008 switch (LMulSDNode->getSExtValue()) {
1009 case 5:
1010 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1011 : RISCV::PseudoSF_VC_I_SE_MF8;
1012 break;
1013 case 6:
1014 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1015 : RISCV::PseudoSF_VC_I_SE_MF4;
1016 break;
1017 case 7:
1018 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1019 : RISCV::PseudoSF_VC_I_SE_MF2;
1020 break;
1021 case 0:
1022 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1023 : RISCV::PseudoSF_VC_I_SE_M1;
1024 break;
1025 case 1:
1026 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1027 : RISCV::PseudoSF_VC_I_SE_M2;
1028 break;
1029 case 2:
1030 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1031 : RISCV::PseudoSF_VC_I_SE_M4;
1032 break;
1033 case 3:
1034 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1035 : RISCV::PseudoSF_VC_I_SE_M8;
1036 break;
1037 }
1038
1039 ReplaceNode(Node, CurDAG->getMachineNode(
1040 Opcode, DL, Node->getSimpleValueType(0), Operands));
1041}
1042
1043static unsigned getSegInstNF(unsigned Intrinsic) {
1044#define INST_NF_CASE(NAME, NF) \
1045 case Intrinsic::riscv_##NAME##NF: \
1046 return NF;
1047#define INST_NF_CASE_MASK(NAME, NF) \
1048 case Intrinsic::riscv_##NAME##NF##_mask: \
1049 return NF;
1050#define INST_NF_CASE_FF(NAME, NF) \
1051 case Intrinsic::riscv_##NAME##NF##ff: \
1052 return NF;
1053#define INST_NF_CASE_FF_MASK(NAME, NF) \
1054 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1055 return NF;
1056#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1057 MACRO_NAME(NAME, 2) \
1058 MACRO_NAME(NAME, 3) \
1059 MACRO_NAME(NAME, 4) \
1060 MACRO_NAME(NAME, 5) \
1061 MACRO_NAME(NAME, 6) \
1062 MACRO_NAME(NAME, 7) \
1063 MACRO_NAME(NAME, 8)
1064#define INST_ALL_NF_CASE(NAME) \
1065 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1066 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1067#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1068 INST_ALL_NF_CASE(NAME) \
1069 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1070 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1071 switch (Intrinsic) {
1072 default:
1073 llvm_unreachable("Unexpected segment load/store intrinsic");
1075 INST_ALL_NF_CASE(vlsseg)
1076 INST_ALL_NF_CASE(vloxseg)
1077 INST_ALL_NF_CASE(vluxseg)
1078 INST_ALL_NF_CASE(vsseg)
1079 INST_ALL_NF_CASE(vssseg)
1080 INST_ALL_NF_CASE(vsoxseg)
1081 INST_ALL_NF_CASE(vsuxseg)
1082 }
1083}
1084
1085static bool isApplicableToPLI(int Val) {
1086 // Check if the immediate is packed i8 or i10
1087 int16_t Bit31To16 = Val >> 16;
1088 int16_t Bit15To0 = Val;
1089 int8_t Bit15To8 = Bit15To0 >> 8;
1090 int8_t Bit7To0 = Val;
1091 if (Bit31To16 != Bit15To0)
1092 return false;
1093
1094 return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
1095}
1096
1098 // If we have a custom node, we have already selected.
1099 if (Node->isMachineOpcode()) {
1100 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1101 Node->setNodeId(-1);
1102 return;
1103 }
1104
1105 // Instruction Selection not handled by the auto-generated tablegen selection
1106 // should be handled here.
1107 unsigned Opcode = Node->getOpcode();
1108 MVT XLenVT = Subtarget->getXLenVT();
1109 SDLoc DL(Node);
1110 MVT VT = Node->getSimpleValueType(0);
1111
1112 bool HasBitTest = Subtarget->hasBEXTILike();
1113
1114 switch (Opcode) {
1115 case ISD::Constant: {
1116 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1117 auto *ConstNode = cast<ConstantSDNode>(Node);
1118 if (ConstNode->isZero()) {
1119 SDValue New =
1120 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1121 ReplaceNode(Node, New.getNode());
1122 return;
1123 }
1124 int64_t Imm = ConstNode->getSExtValue();
1125 // If only the lower 8 bits are used, try to convert this to a simm6 by
1126 // sign-extending bit 7. This is neutral without the C extension, and
1127 // allows C.LI to be used if C is present.
1128 if (!isInt<8>(Imm) && isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) &&
1130 Imm = SignExtend64<8>(Imm);
1131 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1132 // by sign extending bit 15.
1133 else if (!isInt<16>(Imm) && isUInt<16>(Imm) &&
1135 Imm = SignExtend64<16>(Imm);
1136 // If the upper 32-bits are not used try to convert this into a simm32 by
1137 // sign extending bit 32.
1138 else if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1139 Imm = SignExtend64<32>(Imm);
1140
1141 if (VT == MVT::i64 && Subtarget->hasStdExtP() && isApplicableToPLI(Imm) &&
1142 hasAllWUsers(Node)) {
1143 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1144 // can simply copy lower 32 bits to higher 32 bits to make it able to
1145 // rematerialize to PLI_B or PLI_H
1146 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1147 }
1148
1149 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1150 return;
1151 }
1152 case ISD::ConstantFP: {
1153 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1154
1155 bool Is64Bit = Subtarget->is64Bit();
1156 bool HasZdinx = Subtarget->hasStdExtZdinx();
1157
1158 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1159 SDValue Imm;
1160 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1161 // create an integer immediate.
1162 if (APF.isPosZero() || NegZeroF64) {
1163 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1164 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1165 else
1166 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1167 } else {
1168 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1169 *Subtarget);
1170 }
1171
1172 unsigned Opc;
1173 switch (VT.SimpleTy) {
1174 default:
1175 llvm_unreachable("Unexpected size");
1176 case MVT::bf16:
1177 assert(Subtarget->hasStdExtZfbfmin());
1178 Opc = RISCV::FMV_H_X;
1179 break;
1180 case MVT::f16:
1181 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1182 break;
1183 case MVT::f32:
1184 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1185 break;
1186 case MVT::f64:
1187 // For RV32, we can't move from a GPR, we need to convert instead. This
1188 // should only happen for +0.0 and -0.0.
1189 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1190 if (HasZdinx)
1191 Opc = RISCV::COPY;
1192 else
1193 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1194 break;
1195 }
1196
1197 SDNode *Res;
1198 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1199 Res =
1200 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1201 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1202 Res =
1203 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1204 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1205 Res = CurDAG->getMachineNode(
1206 Opc, DL, VT, Imm,
1207 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1208 else
1209 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1210
1211 // For f64 -0.0, we need to insert a fneg.d idiom.
1212 if (NegZeroF64) {
1213 Opc = RISCV::FSGNJN_D;
1214 if (HasZdinx)
1215 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1216 Res =
1217 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1218 }
1219
1220 ReplaceNode(Node, Res);
1221 return;
1222 }
1223 case RISCVISD::BuildGPRPair:
1224 case RISCVISD::BuildPairF64: {
1225 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1226 break;
1227
1228 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1229 "BuildPairF64 only handled here on rv32i_zdinx");
1230
1231 SDValue N =
1232 buildGPRPair(CurDAG, DL, VT, Node->getOperand(0), Node->getOperand(1));
1233 ReplaceNode(Node, N.getNode());
1234 return;
1235 }
1236 case RISCVISD::SplitGPRPair:
1237 case RISCVISD::SplitF64: {
1238 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1239 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1240 "SplitF64 only handled here on rv32i_zdinx");
1241
1242 if (!SDValue(Node, 0).use_empty()) {
1243 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1244 Node->getValueType(0),
1245 Node->getOperand(0));
1246 ReplaceUses(SDValue(Node, 0), Lo);
1247 }
1248
1249 if (!SDValue(Node, 1).use_empty()) {
1250 SDValue Hi = CurDAG->getTargetExtractSubreg(
1251 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1252 ReplaceUses(SDValue(Node, 1), Hi);
1253 }
1254
1255 CurDAG->RemoveDeadNode(Node);
1256 return;
1257 }
1258
1259 assert(Opcode != RISCVISD::SplitGPRPair &&
1260 "SplitGPRPair should already be handled");
1261
1262 if (!Subtarget->hasStdExtZfa())
1263 break;
1264 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1265 "Unexpected subtarget");
1266
1267 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1268 if (!SDValue(Node, 0).use_empty()) {
1269 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1270 Node->getOperand(0));
1271 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1272 }
1273 if (!SDValue(Node, 1).use_empty()) {
1274 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1275 Node->getOperand(0));
1276 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1277 }
1278
1279 CurDAG->RemoveDeadNode(Node);
1280 return;
1281 }
1282 case ISD::SHL: {
1283 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1284 if (!N1C)
1285 break;
1286 SDValue N0 = Node->getOperand(0);
1287 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1289 break;
1290 unsigned ShAmt = N1C->getZExtValue();
1291 uint64_t Mask = N0.getConstantOperandVal(1);
1292
1293 if (isShiftedMask_64(Mask)) {
1294 unsigned XLen = Subtarget->getXLen();
1295 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1296 unsigned TrailingZeros = llvm::countr_zero(Mask);
1297 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1298 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1299 // where C2 has 32 leading zeros and C3 trailing zeros.
1300 SDNode *SRLIW = CurDAG->getMachineNode(
1301 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1302 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1303 SDNode *SLLI = CurDAG->getMachineNode(
1304 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1305 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1306 ReplaceNode(Node, SLLI);
1307 return;
1308 }
1309 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1310 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1311 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1312 // where C2 has C4 leading zeros and no trailing zeros.
1313 // This is profitable if the "and" was to be lowered to
1314 // (srli (slli X, C4), C4) and not (andi X, C2).
1315 // For "LeadingZeros == 32":
1316 // - with Zba it's just (slli.uw X, C)
1317 // - without Zba a tablegen pattern applies the very same
1318 // transform as we would have done here
1319 SDNode *SLLI = CurDAG->getMachineNode(
1320 RISCV::SLLI, DL, VT, N0.getOperand(0),
1321 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1322 SDNode *SRLI = CurDAG->getMachineNode(
1323 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1324 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1325 ReplaceNode(Node, SRLI);
1326 return;
1327 }
1328 }
1329 break;
1330 }
1331 case ISD::SRL: {
1332 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1333 if (!N1C)
1334 break;
1335 SDValue N0 = Node->getOperand(0);
1336 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1337 break;
1338 unsigned ShAmt = N1C->getZExtValue();
1339 uint64_t Mask = N0.getConstantOperandVal(1);
1340
1341 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1342 // 32 leading zeros and C3 trailing zeros.
1343 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1344 unsigned XLen = Subtarget->getXLen();
1345 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1346 unsigned TrailingZeros = llvm::countr_zero(Mask);
1347 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1348 SDNode *SRLIW = CurDAG->getMachineNode(
1349 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1350 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1351 SDNode *SLLI = CurDAG->getMachineNode(
1352 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1353 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1354 ReplaceNode(Node, SLLI);
1355 return;
1356 }
1357 }
1358
1359 // Optimize (srl (and X, C2), C) ->
1360 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1361 // Where C2 is a mask with C3 trailing ones.
1362 // Taking into account that the C2 may have had lower bits unset by
1363 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1364 // This pattern occurs when type legalizing right shifts for types with
1365 // less than XLen bits.
1366 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1367 if (!isMask_64(Mask))
1368 break;
1369 unsigned TrailingOnes = llvm::countr_one(Mask);
1370 if (ShAmt >= TrailingOnes)
1371 break;
1372 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1373 if (TrailingOnes == 32) {
1374 SDNode *SRLI = CurDAG->getMachineNode(
1375 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1376 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1377 ReplaceNode(Node, SRLI);
1378 return;
1379 }
1380
1381 // Only do the remaining transforms if the AND has one use.
1382 if (!N0.hasOneUse())
1383 break;
1384
1385 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1386 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1387 SDNode *BEXTI = CurDAG->getMachineNode(
1388 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1389 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1390 ReplaceNode(Node, BEXTI);
1391 return;
1392 }
1393
1394 const unsigned Msb = TrailingOnes - 1;
1395 const unsigned Lsb = ShAmt;
1396 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1397 return;
1398
1399 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1400 SDNode *SLLI =
1401 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1402 CurDAG->getTargetConstant(LShAmt, DL, VT));
1403 SDNode *SRLI = CurDAG->getMachineNode(
1404 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1405 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1406 ReplaceNode(Node, SRLI);
1407 return;
1408 }
1409 case ISD::SRA: {
1411 return;
1412
1414 return;
1415
1416 // Optimize (sra (sext_inreg X, i16), C) ->
1417 // (srai (slli X, (XLen-16), (XLen-16) + C)
1418 // And (sra (sext_inreg X, i8), C) ->
1419 // (srai (slli X, (XLen-8), (XLen-8) + C)
1420 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1421 // This transform matches the code we get without Zbb. The shifts are more
1422 // compressible, and this can help expose CSE opportunities in the sdiv by
1423 // constant optimization.
1424 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1425 if (!N1C)
1426 break;
1427 SDValue N0 = Node->getOperand(0);
1428 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1429 break;
1430 unsigned ShAmt = N1C->getZExtValue();
1431 unsigned ExtSize =
1432 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1433 // ExtSize of 32 should use sraiw via tablegen pattern.
1434 if (ExtSize >= 32 || ShAmt >= ExtSize)
1435 break;
1436 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1437 SDNode *SLLI =
1438 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1439 CurDAG->getTargetConstant(LShAmt, DL, VT));
1440 SDNode *SRAI = CurDAG->getMachineNode(
1441 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1442 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1443 ReplaceNode(Node, SRAI);
1444 return;
1445 }
1446 case ISD::OR: {
1448 return;
1449
1450 break;
1451 }
1452 case ISD::XOR:
1454 return;
1455
1456 break;
1457 case ISD::AND: {
1458 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1459 if (!N1C)
1460 break;
1461
1462 SDValue N0 = Node->getOperand(0);
1463
1464 bool LeftShift = N0.getOpcode() == ISD::SHL;
1465 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1466 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1467 if (!C)
1468 break;
1469 unsigned C2 = C->getZExtValue();
1470 unsigned XLen = Subtarget->getXLen();
1471 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1472
1473 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1474 // shift pair might offer more compression opportunities.
1475 // TODO: We could check for C extension here, but we don't have many lit
1476 // tests with the C extension enabled so not checking gets better
1477 // coverage.
1478 // TODO: What if ANDI faster than shift?
1479 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1480
1481 uint64_t C1 = N1C->getZExtValue();
1482
1483 // Clear irrelevant bits in the mask.
1484 if (LeftShift)
1486 else
1487 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1488
1489 // Some transforms should only be done if the shift has a single use or
1490 // the AND would become (srli (slli X, 32), 32)
1491 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1492
1493 SDValue X = N0.getOperand(0);
1494
1495 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1496 // with c3 leading zeros.
1497 if (!LeftShift && isMask_64(C1)) {
1498 unsigned Leading = XLen - llvm::bit_width(C1);
1499 if (C2 < Leading) {
1500 // If the number of leading zeros is C2+32 this can be SRLIW.
1501 if (C2 + 32 == Leading) {
1502 SDNode *SRLIW = CurDAG->getMachineNode(
1503 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1504 ReplaceNode(Node, SRLIW);
1505 return;
1506 }
1507
1508 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1509 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1510 //
1511 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1512 // legalized and goes through DAG combine.
1513 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1514 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1515 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1516 SDNode *SRAIW =
1517 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1518 CurDAG->getTargetConstant(31, DL, VT));
1519 SDNode *SRLIW = CurDAG->getMachineNode(
1520 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1521 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1522 ReplaceNode(Node, SRLIW);
1523 return;
1524 }
1525
1526 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1527 // available.
1528 // Transform (and (srl x, C2), C1)
1529 // -> (<bfextract> x, msb, lsb)
1530 //
1531 // Make sure to keep this below the SRLIW cases, as we always want to
1532 // prefer the more common instruction.
1533 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1534 const unsigned Lsb = C2;
1535 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1536 return;
1537
1538 // (srli (slli x, c3-c2), c3).
1539 // Skip if we could use (zext.w (sraiw X, C2)).
1540 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1541 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1542 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1543 // Also Skip if we can use bexti or th.tst.
1544 Skip |= HasBitTest && Leading == XLen - 1;
1545 if (OneUseOrZExtW && !Skip) {
1546 SDNode *SLLI = CurDAG->getMachineNode(
1547 RISCV::SLLI, DL, VT, X,
1548 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1549 SDNode *SRLI = CurDAG->getMachineNode(
1550 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1551 CurDAG->getTargetConstant(Leading, DL, VT));
1552 ReplaceNode(Node, SRLI);
1553 return;
1554 }
1555 }
1556 }
1557
1558 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1559 // shifted by c2 bits with c3 leading zeros.
1560 if (LeftShift && isShiftedMask_64(C1)) {
1561 unsigned Leading = XLen - llvm::bit_width(C1);
1562
1563 if (C2 + Leading < XLen &&
1564 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1565 // Use slli.uw when possible.
1566 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1567 SDNode *SLLI_UW =
1568 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1569 CurDAG->getTargetConstant(C2, DL, VT));
1570 ReplaceNode(Node, SLLI_UW);
1571 return;
1572 }
1573
1574 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1575 // available.
1576 // Transform (and (shl x, c2), c1)
1577 // -> (<bfinsert> x, msb, lsb)
1578 // e.g.
1579 // (and (shl x, 12), 0x00fff000)
1580 // If XLen = 32 and C2 = 12, then
1581 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1582 const unsigned Msb = XLen - Leading - 1;
1583 const unsigned Lsb = C2;
1584 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1585 return;
1586
1587 if (OneUseOrZExtW && !IsCANDI) {
1588 // (packh x0, X)
1589 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1590 SDNode *PACKH = CurDAG->getMachineNode(
1591 RISCV::PACKH, DL, VT,
1592 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1593 ReplaceNode(Node, PACKH);
1594 return;
1595 }
1596 // (srli (slli c2+c3), c3)
1597 SDNode *SLLI = CurDAG->getMachineNode(
1598 RISCV::SLLI, DL, VT, X,
1599 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1600 SDNode *SRLI = CurDAG->getMachineNode(
1601 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1602 CurDAG->getTargetConstant(Leading, DL, VT));
1603 ReplaceNode(Node, SRLI);
1604 return;
1605 }
1606 }
1607 }
1608
1609 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1610 // shifted mask with c2 leading zeros and c3 trailing zeros.
1611 if (!LeftShift && isShiftedMask_64(C1)) {
1612 unsigned Leading = XLen - llvm::bit_width(C1);
1613 unsigned Trailing = llvm::countr_zero(C1);
1614 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1615 !IsCANDI) {
1616 unsigned SrliOpc = RISCV::SRLI;
1617 // If the input is zexti32 we should use SRLIW.
1618 if (X.getOpcode() == ISD::AND &&
1619 isa<ConstantSDNode>(X.getOperand(1)) &&
1620 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1621 SrliOpc = RISCV::SRLIW;
1622 X = X.getOperand(0);
1623 }
1624 SDNode *SRLI = CurDAG->getMachineNode(
1625 SrliOpc, DL, VT, X,
1626 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1627 SDNode *SLLI = CurDAG->getMachineNode(
1628 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1629 CurDAG->getTargetConstant(Trailing, DL, VT));
1630 ReplaceNode(Node, SLLI);
1631 return;
1632 }
1633 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1634 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1635 OneUseOrZExtW && !IsCANDI) {
1636 SDNode *SRLIW = CurDAG->getMachineNode(
1637 RISCV::SRLIW, DL, VT, X,
1638 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1639 SDNode *SLLI = CurDAG->getMachineNode(
1640 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1641 CurDAG->getTargetConstant(Trailing, DL, VT));
1642 ReplaceNode(Node, SLLI);
1643 return;
1644 }
1645 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1646 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1647 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1648 SDNode *SRLI = CurDAG->getMachineNode(
1649 RISCV::SRLI, DL, VT, X,
1650 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1651 SDNode *SLLI_UW = CurDAG->getMachineNode(
1652 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1653 CurDAG->getTargetConstant(Trailing, DL, VT));
1654 ReplaceNode(Node, SLLI_UW);
1655 return;
1656 }
1657 }
1658
1659 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1660 // shifted mask with no leading zeros and c3 trailing zeros.
1661 if (LeftShift && isShiftedMask_64(C1)) {
1662 unsigned Leading = XLen - llvm::bit_width(C1);
1663 unsigned Trailing = llvm::countr_zero(C1);
1664 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1665 SDNode *SRLI = CurDAG->getMachineNode(
1666 RISCV::SRLI, DL, VT, X,
1667 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1668 SDNode *SLLI = CurDAG->getMachineNode(
1669 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1670 CurDAG->getTargetConstant(Trailing, DL, VT));
1671 ReplaceNode(Node, SLLI);
1672 return;
1673 }
1674 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1675 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1676 SDNode *SRLIW = CurDAG->getMachineNode(
1677 RISCV::SRLIW, DL, VT, X,
1678 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1679 SDNode *SLLI = CurDAG->getMachineNode(
1680 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1681 CurDAG->getTargetConstant(Trailing, DL, VT));
1682 ReplaceNode(Node, SLLI);
1683 return;
1684 }
1685
1686 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1687 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1688 Subtarget->hasStdExtZba()) {
1689 SDNode *SRLI = CurDAG->getMachineNode(
1690 RISCV::SRLI, DL, VT, X,
1691 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1692 SDNode *SLLI_UW = CurDAG->getMachineNode(
1693 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1694 CurDAG->getTargetConstant(Trailing, DL, VT));
1695 ReplaceNode(Node, SLLI_UW);
1696 return;
1697 }
1698 }
1699 }
1700
1701 const uint64_t C1 = N1C->getZExtValue();
1702
1703 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1704 N0.hasOneUse()) {
1705 unsigned C2 = N0.getConstantOperandVal(1);
1706 unsigned XLen = Subtarget->getXLen();
1707 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1708
1709 SDValue X = N0.getOperand(0);
1710
1711 // Prefer SRAIW + ANDI when possible.
1712 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1713 X.getOpcode() == ISD::SHL &&
1714 isa<ConstantSDNode>(X.getOperand(1)) &&
1715 X.getConstantOperandVal(1) == 32;
1716 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1717 // mask with c3 leading zeros and c2 is larger than c3.
1718 if (isMask_64(C1) && !Skip) {
1719 unsigned Leading = XLen - llvm::bit_width(C1);
1720 if (C2 > Leading) {
1721 SDNode *SRAI = CurDAG->getMachineNode(
1722 RISCV::SRAI, DL, VT, X,
1723 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1724 SDNode *SRLI = CurDAG->getMachineNode(
1725 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1726 CurDAG->getTargetConstant(Leading, DL, VT));
1727 ReplaceNode(Node, SRLI);
1728 return;
1729 }
1730 }
1731
1732 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1733 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1734 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1735 if (isShiftedMask_64(C1) && !Skip) {
1736 unsigned Leading = XLen - llvm::bit_width(C1);
1737 unsigned Trailing = llvm::countr_zero(C1);
1738 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1739 SDNode *SRAI = CurDAG->getMachineNode(
1740 RISCV::SRAI, DL, VT, N0.getOperand(0),
1741 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1742 SDNode *SRLI = CurDAG->getMachineNode(
1743 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1744 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1745 SDNode *SLLI = CurDAG->getMachineNode(
1746 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1747 CurDAG->getTargetConstant(Trailing, DL, VT));
1748 ReplaceNode(Node, SLLI);
1749 return;
1750 }
1751 }
1752 }
1753
1754 // If C1 masks off the upper bits only (but can't be formed as an
1755 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1756 // available.
1757 // Transform (and x, C1)
1758 // -> (<bfextract> x, msb, lsb)
1759 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1760 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1761 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1762 const unsigned Msb = llvm::bit_width(C1) - 1;
1763 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1764 return;
1765 }
1766
1768 return;
1769
1770 break;
1771 }
1772 case ISD::MUL: {
1773 // Special case for calculating (mul (and X, C2), C1) where the full product
1774 // fits in XLen bits. We can shift X left by the number of leading zeros in
1775 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1776 // product has XLen trailing zeros, putting it in the output of MULHU. This
1777 // can avoid materializing a constant in a register for C2.
1778
1779 // RHS should be a constant.
1780 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1781 if (!N1C || !N1C->hasOneUse())
1782 break;
1783
1784 // LHS should be an AND with constant.
1785 SDValue N0 = Node->getOperand(0);
1786 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1787 break;
1788
1790
1791 // Constant should be a mask.
1792 if (!isMask_64(C2))
1793 break;
1794
1795 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1796 // multiple users or the constant is a simm12. This prevents inserting a
1797 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1798 // make it more costly to materialize. Otherwise, using a SLLI might allow
1799 // it to be compressed.
1800 bool IsANDIOrZExt =
1801 isInt<12>(C2) ||
1802 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1803 // With XTHeadBb, we can use TH.EXTU.
1804 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1805 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1806 break;
1807 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1808 // the constant is a simm32.
1809 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1810 // With XTHeadBb, we can use TH.EXTU.
1811 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1812 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1813 break;
1814
1815 // We need to shift left the AND input and C1 by a total of XLen bits.
1816
1817 // How far left do we need to shift the AND input?
1818 unsigned XLen = Subtarget->getXLen();
1819 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1820
1821 // The constant gets shifted by the remaining amount unless that would
1822 // shift bits out.
1823 uint64_t C1 = N1C->getZExtValue();
1824 unsigned ConstantShift = XLen - LeadingZeros;
1825 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1826 break;
1827
1828 uint64_t ShiftedC1 = C1 << ConstantShift;
1829 // If this RV32, we need to sign extend the constant.
1830 if (XLen == 32)
1831 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1832
1833 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1834 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1835 SDNode *SLLI =
1836 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1837 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1838 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1839 SDValue(SLLI, 0), SDValue(Imm, 0));
1840 ReplaceNode(Node, MULHU);
1841 return;
1842 }
1843 case ISD::SMUL_LOHI:
1844 case ISD::UMUL_LOHI:
1845 case RISCVISD::WMULSU: {
1846 // Custom select (S/U)MUL_LOHI to WMUL(U) for RV32P.
1847 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1848 "Unexpected opcode");
1849
1850 unsigned Opc;
1851 switch (Node->getOpcode()) {
1852 default:
1853 llvm_unreachable("Unexpected opcode");
1854 case ISD::SMUL_LOHI:
1855 Opc = RISCV::WMUL;
1856 break;
1857 case ISD::UMUL_LOHI:
1858 Opc = RISCV::WMULU;
1859 break;
1860 case RISCVISD::WMULSU:
1861 Opc = RISCV::WMULSU;
1862 break;
1863 }
1864
1865 SDNode *WMUL = CurDAG->getMachineNode(
1866 Opc, DL, MVT::Untyped, Node->getOperand(0), Node->getOperand(1));
1867
1868 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(WMUL, 0));
1869 ReplaceUses(SDValue(Node, 0), Lo);
1870 ReplaceUses(SDValue(Node, 1), Hi);
1871 CurDAG->RemoveDeadNode(Node);
1872 return;
1873 }
1874 case RISCVISD::WSLL:
1875 case RISCVISD::WSLA: {
1876 // Custom select WSLL/WSLA for RV32P.
1877 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1878 "Unexpected opcode");
1879
1880 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1881
1882 SDValue ShAmt = Node->getOperand(1);
1883
1884 unsigned Opc;
1885
1886 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1887 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1888 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1889 ShAmt = CurDAG->getTargetConstant(ShAmtC->getZExtValue(), DL, XLenVT);
1890 } else {
1891 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1892 }
1893
1894 SDNode *WShift = CurDAG->getMachineNode(Opc, DL, MVT::Untyped,
1895 Node->getOperand(0), ShAmt);
1896
1897 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(WShift, 0));
1898 ReplaceUses(SDValue(Node, 0), Lo);
1899 ReplaceUses(SDValue(Node, 1), Hi);
1900 CurDAG->RemoveDeadNode(Node);
1901 return;
1902 }
1903 case ISD::LOAD: {
1904 if (tryIndexedLoad(Node))
1905 return;
1906
1907 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1908 // We match post-incrementing load here
1910 if (Load->getAddressingMode() != ISD::POST_INC)
1911 break;
1912
1913 SDValue Chain = Node->getOperand(0);
1914 SDValue Base = Node->getOperand(1);
1915 SDValue Offset = Node->getOperand(2);
1916
1917 bool Simm12 = false;
1918 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1919
1920 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1921 int ConstantVal = ConstantOffset->getSExtValue();
1922 Simm12 = isInt<12>(ConstantVal);
1923 if (Simm12)
1924 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1925 Offset.getValueType());
1926 }
1927
1928 unsigned Opcode = 0;
1929 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1930 case MVT::i8:
1931 if (Simm12 && SignExtend)
1932 Opcode = RISCV::CV_LB_ri_inc;
1933 else if (Simm12 && !SignExtend)
1934 Opcode = RISCV::CV_LBU_ri_inc;
1935 else if (!Simm12 && SignExtend)
1936 Opcode = RISCV::CV_LB_rr_inc;
1937 else
1938 Opcode = RISCV::CV_LBU_rr_inc;
1939 break;
1940 case MVT::i16:
1941 if (Simm12 && SignExtend)
1942 Opcode = RISCV::CV_LH_ri_inc;
1943 else if (Simm12 && !SignExtend)
1944 Opcode = RISCV::CV_LHU_ri_inc;
1945 else if (!Simm12 && SignExtend)
1946 Opcode = RISCV::CV_LH_rr_inc;
1947 else
1948 Opcode = RISCV::CV_LHU_rr_inc;
1949 break;
1950 case MVT::i32:
1951 if (Simm12)
1952 Opcode = RISCV::CV_LW_ri_inc;
1953 else
1954 Opcode = RISCV::CV_LW_rr_inc;
1955 break;
1956 default:
1957 break;
1958 }
1959 if (!Opcode)
1960 break;
1961
1962 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1963 Chain.getSimpleValueType(), Base,
1964 Offset, Chain));
1965 return;
1966 }
1967 break;
1968 }
1969 case RISCVISD::LD_RV32: {
1970 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1971
1973 SDValue Chain = Node->getOperand(0);
1974 SDValue Addr = Node->getOperand(1);
1976
1977 SDValue Ops[] = {Base, Offset, Chain};
1978 MachineSDNode *New = CurDAG->getMachineNode(
1979 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1980 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
1981 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1982 ReplaceUses(SDValue(Node, 0), Lo);
1983 ReplaceUses(SDValue(Node, 1), Hi);
1984 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1985 CurDAG->RemoveDeadNode(Node);
1986 return;
1987 }
1988 case RISCVISD::SD_RV32: {
1990 SDValue Chain = Node->getOperand(0);
1991 SDValue Addr = Node->getOperand(3);
1993
1994 SDValue Lo = Node->getOperand(1);
1995 SDValue Hi = Node->getOperand(2);
1996
1997 SDValue RegPair;
1998 // Peephole to use X0_Pair for storing zero.
2000 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2001 } else {
2002 RegPair = buildGPRPair(CurDAG, DL, MVT::Untyped, Lo, Hi);
2003 }
2004
2005 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
2006 {RegPair, Base, Offset, Chain});
2007 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2008 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
2009 CurDAG->RemoveDeadNode(Node);
2010 return;
2011 }
2012 case RISCVISD::ADDD:
2013 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2014 // widening multiply.
2016 return;
2017
2018 // Fall through to regular ADDD selection.
2019 [[fallthrough]];
2020 case RISCVISD::SUBD:
2021 case RISCVISD::PPAIRE_DB:
2022 case RISCVISD::WADDAU:
2023 case RISCVISD::WSUBAU: {
2024 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2025 assert(
2026 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2027 "Unexpected opcode");
2028
2029 SDValue Op0Lo = Node->getOperand(0);
2030 SDValue Op0Hi = Node->getOperand(1);
2031
2032 SDValue Op0;
2033 if (isNullConstant(Op0Lo) && isNullConstant(Op0Hi)) {
2034 Op0 = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2035 } else {
2036 Op0 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op0Lo, Op0Hi);
2037 }
2038
2039 SDValue Op1Lo = Node->getOperand(2);
2040 SDValue Op1Hi = Node->getOperand(3);
2041
2042 MachineSDNode *New;
2043 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU) {
2044 // WADDAU/WSUBAU: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi are
2045 // the two 32-bit values.
2046 unsigned Opc = Opcode == RISCVISD::WADDAU ? RISCV::WADDAU : RISCV::WSUBAU;
2047 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1Lo, Op1Hi);
2048 } else {
2049 SDValue Op1 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op1Lo, Op1Hi);
2050
2051 unsigned Opc;
2052 switch (Opcode) {
2053 default:
2054 llvm_unreachable("Unexpected opcode");
2055 case RISCVISD::ADDD:
2056 Opc = RISCV::ADDD;
2057 break;
2058 case RISCVISD::SUBD:
2059 Opc = RISCV::SUBD;
2060 break;
2061 case RISCVISD::PPAIRE_DB:
2062 Opc = RISCV::PPAIRE_DB;
2063 break;
2064 }
2065 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1);
2066 }
2067
2068 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2069 ReplaceUses(SDValue(Node, 0), Lo);
2070 ReplaceUses(SDValue(Node, 1), Hi);
2071 CurDAG->RemoveDeadNode(Node);
2072 return;
2073 }
2075 unsigned IntNo = Node->getConstantOperandVal(0);
2076 switch (IntNo) {
2077 // By default we do not custom select any intrinsic.
2078 default:
2079 break;
2080 case Intrinsic::riscv_vmsgeu:
2081 case Intrinsic::riscv_vmsge: {
2082 SDValue Src1 = Node->getOperand(1);
2083 SDValue Src2 = Node->getOperand(2);
2084 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2085 bool IsCmpConstant = false;
2086 bool IsCmpMinimum = false;
2087 // Only custom select scalar second operand.
2088 if (Src2.getValueType() != XLenVT)
2089 break;
2090 // Small constants are handled with patterns.
2091 int64_t CVal = 0;
2092 MVT Src1VT = Src1.getSimpleValueType();
2093 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2094 IsCmpConstant = true;
2095 CVal = C->getSExtValue();
2096 if (CVal >= -15 && CVal <= 16) {
2097 if (!IsUnsigned || CVal != 0)
2098 break;
2099 IsCmpMinimum = true;
2100 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2101 Src1VT.getScalarSizeInBits())
2102 .getSExtValue()) {
2103 IsCmpMinimum = true;
2104 }
2105 }
2106 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2107 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2108 default:
2109 llvm_unreachable("Unexpected LMUL!");
2110#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2111 case RISCVVType::lmulenum: \
2112 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2113 : RISCV::PseudoVMSLT_VX_##suffix; \
2114 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2115 : RISCV::PseudoVMSGT_VX_##suffix; \
2116 break;
2117 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2118 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2119 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2120 CASE_VMSLT_OPCODES(LMUL_1, M1)
2121 CASE_VMSLT_OPCODES(LMUL_2, M2)
2122 CASE_VMSLT_OPCODES(LMUL_4, M4)
2123 CASE_VMSLT_OPCODES(LMUL_8, M8)
2124#undef CASE_VMSLT_OPCODES
2125 }
2126 // Mask operations use the LMUL from the mask type.
2127 switch (RISCVTargetLowering::getLMUL(VT)) {
2128 default:
2129 llvm_unreachable("Unexpected LMUL!");
2130#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2131 case RISCVVType::lmulenum: \
2132 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2133 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2134 break;
2135 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2136 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2137 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2138 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2139 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2140 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2141 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2142#undef CASE_VMNAND_VMSET_OPCODES
2143 }
2144 SDValue SEW = CurDAG->getTargetConstant(
2145 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2146 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2147 SDValue VL;
2148 selectVLOp(Node->getOperand(3), VL);
2149
2150 // If vmsge(u) with minimum value, expand it to vmset.
2151 if (IsCmpMinimum) {
2153 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
2154 return;
2155 }
2156
2157 if (IsCmpConstant) {
2158 SDValue Imm =
2159 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2160
2161 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2162 {Src1, Imm, VL, SEW}));
2163 return;
2164 }
2165
2166 // Expand to
2167 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2168 SDValue Cmp = SDValue(
2169 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2170 0);
2171 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2172 {Cmp, Cmp, VL, MaskSEW}));
2173 return;
2174 }
2175 case Intrinsic::riscv_vmsgeu_mask:
2176 case Intrinsic::riscv_vmsge_mask: {
2177 SDValue Src1 = Node->getOperand(2);
2178 SDValue Src2 = Node->getOperand(3);
2179 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2180 bool IsCmpConstant = false;
2181 bool IsCmpMinimum = false;
2182 // Only custom select scalar second operand.
2183 if (Src2.getValueType() != XLenVT)
2184 break;
2185 // Small constants are handled with patterns.
2186 MVT Src1VT = Src1.getSimpleValueType();
2187 int64_t CVal = 0;
2188 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2189 IsCmpConstant = true;
2190 CVal = C->getSExtValue();
2191 if (CVal >= -15 && CVal <= 16) {
2192 if (!IsUnsigned || CVal != 0)
2193 break;
2194 IsCmpMinimum = true;
2195 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2196 Src1VT.getScalarSizeInBits())
2197 .getSExtValue()) {
2198 IsCmpMinimum = true;
2199 }
2200 }
2201 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2202 VMOROpcode, VMSGTMaskOpcode;
2203 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2204 default:
2205 llvm_unreachable("Unexpected LMUL!");
2206#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2207 case RISCVVType::lmulenum: \
2208 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2209 : RISCV::PseudoVMSLT_VX_##suffix; \
2210 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2211 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2212 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2213 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2214 break;
2215 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2216 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2217 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2218 CASE_VMSLT_OPCODES(LMUL_1, M1)
2219 CASE_VMSLT_OPCODES(LMUL_2, M2)
2220 CASE_VMSLT_OPCODES(LMUL_4, M4)
2221 CASE_VMSLT_OPCODES(LMUL_8, M8)
2222#undef CASE_VMSLT_OPCODES
2223 }
2224 // Mask operations use the LMUL from the mask type.
2225 switch (RISCVTargetLowering::getLMUL(VT)) {
2226 default:
2227 llvm_unreachable("Unexpected LMUL!");
2228#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2229 case RISCVVType::lmulenum: \
2230 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2231 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2232 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2233 break;
2234 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2235 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2236 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2241#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2242 }
2243 SDValue SEW = CurDAG->getTargetConstant(
2244 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2245 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2246 SDValue VL;
2247 selectVLOp(Node->getOperand(5), VL);
2248 SDValue MaskedOff = Node->getOperand(1);
2249 SDValue Mask = Node->getOperand(4);
2250
2251 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2252 if (IsCmpMinimum) {
2253 // We don't need vmor if the MaskedOff and the Mask are the same
2254 // value.
2255 if (Mask == MaskedOff) {
2256 ReplaceUses(Node, Mask.getNode());
2257 return;
2258 }
2260 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2261 {Mask, MaskedOff, VL, MaskSEW}));
2262 return;
2263 }
2264
2265 // If the MaskedOff value and the Mask are the same value use
2266 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2267 // This avoids needing to copy v0 to vd before starting the next sequence.
2268 if (Mask == MaskedOff) {
2269 SDValue Cmp = SDValue(
2270 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2271 0);
2272 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2273 {Mask, Cmp, VL, MaskSEW}));
2274 return;
2275 }
2276
2277 SDValue PolicyOp =
2278 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2279
2280 if (IsCmpConstant) {
2281 SDValue Imm =
2282 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2283
2284 ReplaceNode(Node, CurDAG->getMachineNode(
2285 VMSGTMaskOpcode, DL, VT,
2286 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2287 return;
2288 }
2289
2290 // Otherwise use
2291 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2292 // The result is mask undisturbed.
2293 // We use the same instructions to emulate mask agnostic behavior, because
2294 // the agnostic result can be either undisturbed or all 1.
2295 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2296 {MaskedOff, Src1, Src2, Mask,
2297 VL, SEW, PolicyOp}),
2298 0);
2299 // vmxor.mm vd, vd, v0 is used to update active value.
2300 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2301 {Cmp, Mask, VL, MaskSEW}));
2302 return;
2303 }
2304 case Intrinsic::riscv_vsetvli:
2305 case Intrinsic::riscv_vsetvlimax:
2306 return selectVSETVLI(Node);
2307 case Intrinsic::riscv_sf_vsettnt:
2308 case Intrinsic::riscv_sf_vsettm:
2309 case Intrinsic::riscv_sf_vsettk:
2310 return selectXSfmmVSET(Node);
2311 }
2312 break;
2313 }
2315 unsigned IntNo = Node->getConstantOperandVal(1);
2316 switch (IntNo) {
2317 // By default we do not custom select any intrinsic.
2318 default:
2319 break;
2320 case Intrinsic::riscv_vlseg2:
2321 case Intrinsic::riscv_vlseg3:
2322 case Intrinsic::riscv_vlseg4:
2323 case Intrinsic::riscv_vlseg5:
2324 case Intrinsic::riscv_vlseg6:
2325 case Intrinsic::riscv_vlseg7:
2326 case Intrinsic::riscv_vlseg8: {
2327 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2328 /*IsStrided*/ false);
2329 return;
2330 }
2331 case Intrinsic::riscv_vlseg2_mask:
2332 case Intrinsic::riscv_vlseg3_mask:
2333 case Intrinsic::riscv_vlseg4_mask:
2334 case Intrinsic::riscv_vlseg5_mask:
2335 case Intrinsic::riscv_vlseg6_mask:
2336 case Intrinsic::riscv_vlseg7_mask:
2337 case Intrinsic::riscv_vlseg8_mask: {
2338 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2339 /*IsStrided*/ false);
2340 return;
2341 }
2342 case Intrinsic::riscv_vlsseg2:
2343 case Intrinsic::riscv_vlsseg3:
2344 case Intrinsic::riscv_vlsseg4:
2345 case Intrinsic::riscv_vlsseg5:
2346 case Intrinsic::riscv_vlsseg6:
2347 case Intrinsic::riscv_vlsseg7:
2348 case Intrinsic::riscv_vlsseg8: {
2349 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2350 /*IsStrided*/ true);
2351 return;
2352 }
2353 case Intrinsic::riscv_vlsseg2_mask:
2354 case Intrinsic::riscv_vlsseg3_mask:
2355 case Intrinsic::riscv_vlsseg4_mask:
2356 case Intrinsic::riscv_vlsseg5_mask:
2357 case Intrinsic::riscv_vlsseg6_mask:
2358 case Intrinsic::riscv_vlsseg7_mask:
2359 case Intrinsic::riscv_vlsseg8_mask: {
2360 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2361 /*IsStrided*/ true);
2362 return;
2363 }
2364 case Intrinsic::riscv_vloxseg2:
2365 case Intrinsic::riscv_vloxseg3:
2366 case Intrinsic::riscv_vloxseg4:
2367 case Intrinsic::riscv_vloxseg5:
2368 case Intrinsic::riscv_vloxseg6:
2369 case Intrinsic::riscv_vloxseg7:
2370 case Intrinsic::riscv_vloxseg8:
2371 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2372 /*IsOrdered*/ true);
2373 return;
2374 case Intrinsic::riscv_vluxseg2:
2375 case Intrinsic::riscv_vluxseg3:
2376 case Intrinsic::riscv_vluxseg4:
2377 case Intrinsic::riscv_vluxseg5:
2378 case Intrinsic::riscv_vluxseg6:
2379 case Intrinsic::riscv_vluxseg7:
2380 case Intrinsic::riscv_vluxseg8:
2381 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2382 /*IsOrdered*/ false);
2383 return;
2384 case Intrinsic::riscv_vloxseg2_mask:
2385 case Intrinsic::riscv_vloxseg3_mask:
2386 case Intrinsic::riscv_vloxseg4_mask:
2387 case Intrinsic::riscv_vloxseg5_mask:
2388 case Intrinsic::riscv_vloxseg6_mask:
2389 case Intrinsic::riscv_vloxseg7_mask:
2390 case Intrinsic::riscv_vloxseg8_mask:
2391 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2392 /*IsOrdered*/ true);
2393 return;
2394 case Intrinsic::riscv_vluxseg2_mask:
2395 case Intrinsic::riscv_vluxseg3_mask:
2396 case Intrinsic::riscv_vluxseg4_mask:
2397 case Intrinsic::riscv_vluxseg5_mask:
2398 case Intrinsic::riscv_vluxseg6_mask:
2399 case Intrinsic::riscv_vluxseg7_mask:
2400 case Intrinsic::riscv_vluxseg8_mask:
2401 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2402 /*IsOrdered*/ false);
2403 return;
2404 case Intrinsic::riscv_vlseg8ff:
2405 case Intrinsic::riscv_vlseg7ff:
2406 case Intrinsic::riscv_vlseg6ff:
2407 case Intrinsic::riscv_vlseg5ff:
2408 case Intrinsic::riscv_vlseg4ff:
2409 case Intrinsic::riscv_vlseg3ff:
2410 case Intrinsic::riscv_vlseg2ff: {
2411 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2412 return;
2413 }
2414 case Intrinsic::riscv_vlseg8ff_mask:
2415 case Intrinsic::riscv_vlseg7ff_mask:
2416 case Intrinsic::riscv_vlseg6ff_mask:
2417 case Intrinsic::riscv_vlseg5ff_mask:
2418 case Intrinsic::riscv_vlseg4ff_mask:
2419 case Intrinsic::riscv_vlseg3ff_mask:
2420 case Intrinsic::riscv_vlseg2ff_mask: {
2421 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2422 return;
2423 }
2424 case Intrinsic::riscv_vloxei:
2425 case Intrinsic::riscv_vloxei_mask:
2426 case Intrinsic::riscv_vluxei:
2427 case Intrinsic::riscv_vluxei_mask: {
2428 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2429 IntNo == Intrinsic::riscv_vluxei_mask;
2430 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2431 IntNo == Intrinsic::riscv_vloxei_mask;
2432
2433 MVT VT = Node->getSimpleValueType(0);
2434 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2435
2436 unsigned CurOp = 2;
2437 SmallVector<SDValue, 8> Operands;
2438 Operands.push_back(Node->getOperand(CurOp++));
2439
2440 MVT IndexVT;
2441 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2442 /*IsStridedOrIndexed*/ true, Operands,
2443 /*IsLoad=*/true, &IndexVT);
2444
2446 "Element count mismatch");
2447
2450 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2451 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2452 reportFatalUsageError("The V extension does not support EEW=64 for "
2453 "index values when XLEN=32");
2454 }
2455 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2456 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2457 static_cast<unsigned>(IndexLMUL));
2458 MachineSDNode *Load =
2459 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2460
2461 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2462
2463 ReplaceNode(Node, Load);
2464 return;
2465 }
2466 case Intrinsic::riscv_vlm:
2467 case Intrinsic::riscv_vle:
2468 case Intrinsic::riscv_vle_mask:
2469 case Intrinsic::riscv_vlse:
2470 case Intrinsic::riscv_vlse_mask: {
2471 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2472 IntNo == Intrinsic::riscv_vlse_mask;
2473 bool IsStrided =
2474 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2475
2476 MVT VT = Node->getSimpleValueType(0);
2477 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2478
2479 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2480 // operand at the IR level. In pseudos, they have both policy and
2481 // passthru operand. The passthru operand is needed to track the
2482 // "tail undefined" state, and the policy is there just for
2483 // for consistency - it will always be "don't care" for the
2484 // unmasked form.
2485 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2486 unsigned CurOp = 2;
2487 SmallVector<SDValue, 8> Operands;
2488 if (HasPassthruOperand)
2489 Operands.push_back(Node->getOperand(CurOp++));
2490 else {
2491 // We eagerly lower to implicit_def (instead of undef), as we
2492 // otherwise fail to select nodes such as: nxv1i1 = undef
2493 SDNode *Passthru =
2494 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2495 Operands.push_back(SDValue(Passthru, 0));
2496 }
2497 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2498 Operands, /*IsLoad=*/true);
2499
2501 const RISCV::VLEPseudo *P =
2502 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2503 static_cast<unsigned>(LMUL));
2504 MachineSDNode *Load =
2505 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2506
2507 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2508
2509 ReplaceNode(Node, Load);
2510 return;
2511 }
2512 case Intrinsic::riscv_vleff:
2513 case Intrinsic::riscv_vleff_mask: {
2514 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2515
2516 MVT VT = Node->getSimpleValueType(0);
2517 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2518
2519 unsigned CurOp = 2;
2520 SmallVector<SDValue, 7> Operands;
2521 Operands.push_back(Node->getOperand(CurOp++));
2522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2523 /*IsStridedOrIndexed*/ false, Operands,
2524 /*IsLoad=*/true);
2525
2527 const RISCV::VLEPseudo *P =
2528 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2529 Log2SEW, static_cast<unsigned>(LMUL));
2530 MachineSDNode *Load = CurDAG->getMachineNode(
2531 P->Pseudo, DL, Node->getVTList(), Operands);
2532 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2533
2534 ReplaceNode(Node, Load);
2535 return;
2536 }
2537 case Intrinsic::riscv_nds_vln:
2538 case Intrinsic::riscv_nds_vln_mask:
2539 case Intrinsic::riscv_nds_vlnu:
2540 case Intrinsic::riscv_nds_vlnu_mask: {
2541 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2542 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2543 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2544 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2545
2546 MVT VT = Node->getSimpleValueType(0);
2547 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2548 unsigned CurOp = 2;
2549 SmallVector<SDValue, 8> Operands;
2550
2551 Operands.push_back(Node->getOperand(CurOp++));
2552 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2553 /*IsStridedOrIndexed=*/false, Operands,
2554 /*IsLoad=*/true);
2555
2557 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2558 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2559 MachineSDNode *Load =
2560 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2561
2562 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2563 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2564
2565 ReplaceNode(Node, Load);
2566 return;
2567 }
2568 }
2569 break;
2570 }
2571 case ISD::INTRINSIC_VOID: {
2572 unsigned IntNo = Node->getConstantOperandVal(1);
2573 switch (IntNo) {
2574 case Intrinsic::riscv_vsseg2:
2575 case Intrinsic::riscv_vsseg3:
2576 case Intrinsic::riscv_vsseg4:
2577 case Intrinsic::riscv_vsseg5:
2578 case Intrinsic::riscv_vsseg6:
2579 case Intrinsic::riscv_vsseg7:
2580 case Intrinsic::riscv_vsseg8: {
2581 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2582 /*IsStrided*/ false);
2583 return;
2584 }
2585 case Intrinsic::riscv_vsseg2_mask:
2586 case Intrinsic::riscv_vsseg3_mask:
2587 case Intrinsic::riscv_vsseg4_mask:
2588 case Intrinsic::riscv_vsseg5_mask:
2589 case Intrinsic::riscv_vsseg6_mask:
2590 case Intrinsic::riscv_vsseg7_mask:
2591 case Intrinsic::riscv_vsseg8_mask: {
2592 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2593 /*IsStrided*/ false);
2594 return;
2595 }
2596 case Intrinsic::riscv_vssseg2:
2597 case Intrinsic::riscv_vssseg3:
2598 case Intrinsic::riscv_vssseg4:
2599 case Intrinsic::riscv_vssseg5:
2600 case Intrinsic::riscv_vssseg6:
2601 case Intrinsic::riscv_vssseg7:
2602 case Intrinsic::riscv_vssseg8: {
2603 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2604 /*IsStrided*/ true);
2605 return;
2606 }
2607 case Intrinsic::riscv_vssseg2_mask:
2608 case Intrinsic::riscv_vssseg3_mask:
2609 case Intrinsic::riscv_vssseg4_mask:
2610 case Intrinsic::riscv_vssseg5_mask:
2611 case Intrinsic::riscv_vssseg6_mask:
2612 case Intrinsic::riscv_vssseg7_mask:
2613 case Intrinsic::riscv_vssseg8_mask: {
2614 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2615 /*IsStrided*/ true);
2616 return;
2617 }
2618 case Intrinsic::riscv_vsoxseg2:
2619 case Intrinsic::riscv_vsoxseg3:
2620 case Intrinsic::riscv_vsoxseg4:
2621 case Intrinsic::riscv_vsoxseg5:
2622 case Intrinsic::riscv_vsoxseg6:
2623 case Intrinsic::riscv_vsoxseg7:
2624 case Intrinsic::riscv_vsoxseg8:
2625 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2626 /*IsOrdered*/ true);
2627 return;
2628 case Intrinsic::riscv_vsuxseg2:
2629 case Intrinsic::riscv_vsuxseg3:
2630 case Intrinsic::riscv_vsuxseg4:
2631 case Intrinsic::riscv_vsuxseg5:
2632 case Intrinsic::riscv_vsuxseg6:
2633 case Intrinsic::riscv_vsuxseg7:
2634 case Intrinsic::riscv_vsuxseg8:
2635 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2636 /*IsOrdered*/ false);
2637 return;
2638 case Intrinsic::riscv_vsoxseg2_mask:
2639 case Intrinsic::riscv_vsoxseg3_mask:
2640 case Intrinsic::riscv_vsoxseg4_mask:
2641 case Intrinsic::riscv_vsoxseg5_mask:
2642 case Intrinsic::riscv_vsoxseg6_mask:
2643 case Intrinsic::riscv_vsoxseg7_mask:
2644 case Intrinsic::riscv_vsoxseg8_mask:
2645 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2646 /*IsOrdered*/ true);
2647 return;
2648 case Intrinsic::riscv_vsuxseg2_mask:
2649 case Intrinsic::riscv_vsuxseg3_mask:
2650 case Intrinsic::riscv_vsuxseg4_mask:
2651 case Intrinsic::riscv_vsuxseg5_mask:
2652 case Intrinsic::riscv_vsuxseg6_mask:
2653 case Intrinsic::riscv_vsuxseg7_mask:
2654 case Intrinsic::riscv_vsuxseg8_mask:
2655 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2656 /*IsOrdered*/ false);
2657 return;
2658 case Intrinsic::riscv_vsoxei:
2659 case Intrinsic::riscv_vsoxei_mask:
2660 case Intrinsic::riscv_vsuxei:
2661 case Intrinsic::riscv_vsuxei_mask: {
2662 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2663 IntNo == Intrinsic::riscv_vsuxei_mask;
2664 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2665 IntNo == Intrinsic::riscv_vsoxei_mask;
2666
2667 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2668 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2669
2670 unsigned CurOp = 2;
2671 SmallVector<SDValue, 8> Operands;
2672 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2673
2674 MVT IndexVT;
2675 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2676 /*IsStridedOrIndexed*/ true, Operands,
2677 /*IsLoad=*/false, &IndexVT);
2678
2680 "Element count mismatch");
2681
2684 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2685 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2686 reportFatalUsageError("The V extension does not support EEW=64 for "
2687 "index values when XLEN=32");
2688 }
2689 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2690 IsMasked, IsOrdered, IndexLog2EEW,
2691 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2692 MachineSDNode *Store =
2693 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2694
2695 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2696
2697 ReplaceNode(Node, Store);
2698 return;
2699 }
2700 case Intrinsic::riscv_vsm:
2701 case Intrinsic::riscv_vse:
2702 case Intrinsic::riscv_vse_mask:
2703 case Intrinsic::riscv_vsse:
2704 case Intrinsic::riscv_vsse_mask: {
2705 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2706 IntNo == Intrinsic::riscv_vsse_mask;
2707 bool IsStrided =
2708 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2709
2710 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2711 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2712
2713 unsigned CurOp = 2;
2714 SmallVector<SDValue, 8> Operands;
2715 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2716
2717 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2718 Operands);
2719
2721 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2722 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2723 MachineSDNode *Store =
2724 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2725 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2726
2727 ReplaceNode(Node, Store);
2728 return;
2729 }
2730 case Intrinsic::riscv_sf_vc_x_se:
2731 case Intrinsic::riscv_sf_vc_i_se:
2733 return;
2734 case Intrinsic::riscv_sf_vlte8:
2735 case Intrinsic::riscv_sf_vlte16:
2736 case Intrinsic::riscv_sf_vlte32:
2737 case Intrinsic::riscv_sf_vlte64: {
2738 unsigned Log2SEW;
2739 unsigned PseudoInst;
2740 switch (IntNo) {
2741 case Intrinsic::riscv_sf_vlte8:
2742 PseudoInst = RISCV::PseudoSF_VLTE8;
2743 Log2SEW = 3;
2744 break;
2745 case Intrinsic::riscv_sf_vlte16:
2746 PseudoInst = RISCV::PseudoSF_VLTE16;
2747 Log2SEW = 4;
2748 break;
2749 case Intrinsic::riscv_sf_vlte32:
2750 PseudoInst = RISCV::PseudoSF_VLTE32;
2751 Log2SEW = 5;
2752 break;
2753 case Intrinsic::riscv_sf_vlte64:
2754 PseudoInst = RISCV::PseudoSF_VLTE64;
2755 Log2SEW = 6;
2756 break;
2757 }
2758
2759 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2760 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2761 SDValue Operands[] = {Node->getOperand(2),
2762 Node->getOperand(3),
2763 Node->getOperand(4),
2764 SEWOp,
2765 TWidenOp,
2766 Node->getOperand(0)};
2767
2768 MachineSDNode *TileLoad =
2769 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2770 CurDAG->setNodeMemRefs(TileLoad,
2771 {cast<MemSDNode>(Node)->getMemOperand()});
2772
2773 ReplaceNode(Node, TileLoad);
2774 return;
2775 }
2776 case Intrinsic::riscv_sf_mm_s_s:
2777 case Intrinsic::riscv_sf_mm_s_u:
2778 case Intrinsic::riscv_sf_mm_u_s:
2779 case Intrinsic::riscv_sf_mm_u_u:
2780 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2781 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2782 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2783 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2784 case Intrinsic::riscv_sf_mm_f_f: {
2785 bool HasFRM = false;
2786 unsigned PseudoInst;
2787 switch (IntNo) {
2788 case Intrinsic::riscv_sf_mm_s_s:
2789 PseudoInst = RISCV::PseudoSF_MM_S_S;
2790 break;
2791 case Intrinsic::riscv_sf_mm_s_u:
2792 PseudoInst = RISCV::PseudoSF_MM_S_U;
2793 break;
2794 case Intrinsic::riscv_sf_mm_u_s:
2795 PseudoInst = RISCV::PseudoSF_MM_U_S;
2796 break;
2797 case Intrinsic::riscv_sf_mm_u_u:
2798 PseudoInst = RISCV::PseudoSF_MM_U_U;
2799 break;
2800 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2801 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2802 HasFRM = true;
2803 break;
2804 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2805 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2806 HasFRM = true;
2807 break;
2808 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2809 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2810 HasFRM = true;
2811 break;
2812 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2813 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2814 HasFRM = true;
2815 break;
2816 case Intrinsic::riscv_sf_mm_f_f:
2817 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2818 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2819 else
2820 PseudoInst = RISCV::PseudoSF_MM_F_F;
2821 HasFRM = true;
2822 break;
2823 }
2824 uint64_t TileNum = Node->getConstantOperandVal(2);
2825 SDValue Op1 = Node->getOperand(3);
2826 SDValue Op2 = Node->getOperand(4);
2827 MVT VT = Op1->getSimpleValueType(0);
2828 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2829 SDValue TmOp = Node->getOperand(5);
2830 SDValue TnOp = Node->getOperand(6);
2831 SDValue TkOp = Node->getOperand(7);
2832 SDValue TWidenOp = Node->getOperand(8);
2833 SDValue Chain = Node->getOperand(0);
2834
2835 // sf.mm.f.f with sew=32, twiden=2 is invalid
2836 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2837 TWidenOp->getAsZExtVal() == 2)
2838 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2839
2840 SmallVector<SDValue, 10> Operands(
2841 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2842 if (HasFRM)
2843 Operands.push_back(
2844 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2845 Operands.append({TmOp, TnOp, TkOp,
2846 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2847 Chain});
2848
2849 auto *NewNode =
2850 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2851
2852 ReplaceNode(Node, NewNode);
2853 return;
2854 }
2855 case Intrinsic::riscv_sf_vtzero_t: {
2856 uint64_t TileNum = Node->getConstantOperandVal(2);
2857 SDValue Tm = Node->getOperand(3);
2858 SDValue Tn = Node->getOperand(4);
2859 SDValue Log2SEW = Node->getOperand(5);
2860 SDValue TWiden = Node->getOperand(6);
2861 SDValue Chain = Node->getOperand(0);
2862 auto *NewNode = CurDAG->getMachineNode(
2863 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2864 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2865 TWiden, Chain});
2866
2867 ReplaceNode(Node, NewNode);
2868 return;
2869 }
2870 }
2871 break;
2872 }
2873 case ISD::BITCAST: {
2874 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2875 // Just drop bitcasts between vectors if both are fixed or both are
2876 // scalable.
2877 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2878 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2879 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2880 CurDAG->RemoveDeadNode(Node);
2881 return;
2882 }
2883 if (Subtarget->hasStdExtP()) {
2884 bool Is32BitCast =
2885 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2886 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2887 bool Is64BitCast =
2888 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2889 SrcVT == MVT::v2i32)) ||
2890 (SrcVT == MVT::i64 &&
2891 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2892 if (Is32BitCast || Is64BitCast) {
2893 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2894 CurDAG->RemoveDeadNode(Node);
2895 return;
2896 }
2897 }
2898 break;
2899 }
2900 case ISD::SPLAT_VECTOR: {
2901 if (!Subtarget->hasStdExtP())
2902 break;
2903 auto *ConstNode = dyn_cast<ConstantSDNode>(Node->getOperand(0));
2904 if (!ConstNode)
2905 break;
2906
2907 if (ConstNode->isZero()) {
2908 SDValue New =
2909 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
2910 ReplaceNode(Node, New.getNode());
2911 return;
2912 }
2913
2914 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2915 APInt Val = ConstNode->getAPIntValue().trunc(EltSize);
2916
2917 // Find the smallest splat.
2918 if (Val.getBitWidth() > 16 && Val.isSplat(16))
2919 Val = Val.trunc(16);
2920 if (Val.getBitWidth() > 8 && Val.isSplat(8))
2921 Val = Val.trunc(8);
2922
2923 EltSize = Val.getBitWidth();
2924 int64_t Imm = Val.getSExtValue();
2925
2926 unsigned Opc = 0;
2927 if (EltSize == 8) {
2928 Opc = RISCV::PLI_B;
2929 } else if (isInt<10>(Imm)) {
2930 Opc = EltSize == 32 ? RISCV::PLI_W : RISCV::PLI_H;
2931 } else if (EltSize == 16 && isShiftedInt<10, 6>(Imm)) {
2932 Opc = RISCV::PLUI_H;
2933 Imm = Imm >> 6;
2934 } else if (EltSize == 32 && isShiftedInt<10, 22>(Imm)) {
2935 Opc = RISCV::PLUI_W;
2936 Imm = Imm >> 22;
2937 }
2938
2939 if (Opc) {
2940 SDNode *NewNode = CurDAG->getMachineNode(
2941 Opc, DL, VT, CurDAG->getSignedTargetConstant(Imm, DL, XLenVT));
2942 ReplaceNode(Node, NewNode);
2943 return;
2944 }
2945
2946 break;
2947 }
2949 if (Subtarget->hasStdExtP()) {
2950 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2951 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2952 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2953 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2954 CurDAG->RemoveDeadNode(Node);
2955 return;
2956 }
2957 }
2958 break;
2960 case RISCVISD::TUPLE_INSERT: {
2961 SDValue V = Node->getOperand(0);
2962 SDValue SubV = Node->getOperand(1);
2963 SDLoc DL(SubV);
2964 auto Idx = Node->getConstantOperandVal(2);
2965 MVT SubVecVT = SubV.getSimpleValueType();
2966
2967 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2968 MVT SubVecContainerVT = SubVecVT;
2969 // Establish the correct scalable-vector types for any fixed-length type.
2970 if (SubVecVT.isFixedLengthVector()) {
2971 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2973 [[maybe_unused]] bool ExactlyVecRegSized =
2974 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2975 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2976 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2977 .getKnownMinValue()));
2978 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2979 }
2980 MVT ContainerVT = VT;
2981 if (VT.isFixedLengthVector())
2982 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2983
2984 const auto *TRI = Subtarget->getRegisterInfo();
2985 unsigned SubRegIdx;
2986 std::tie(SubRegIdx, Idx) =
2988 ContainerVT, SubVecContainerVT, Idx, TRI);
2989
2990 // If the Idx hasn't been completely eliminated then this is a subvector
2991 // insert which doesn't naturally align to a vector register. These must
2992 // be handled using instructions to manipulate the vector registers.
2993 if (Idx != 0)
2994 break;
2995
2996 RISCVVType::VLMUL SubVecLMUL =
2997 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2998 [[maybe_unused]] bool IsSubVecPartReg =
2999 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3000 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3001 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3002 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3003 V.isUndef()) &&
3004 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3005 "the subvector is smaller than a full-sized register");
3006
3007 // If we haven't set a SubRegIdx, then we must be going between
3008 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3009 if (SubRegIdx == RISCV::NoSubRegister) {
3010 unsigned InRegClassID =
3013 InRegClassID &&
3014 "Unexpected subvector extraction");
3015 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3016 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3017 DL, VT, SubV, RC);
3018 ReplaceNode(Node, NewNode);
3019 return;
3020 }
3021
3022 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
3023 ReplaceNode(Node, Insert.getNode());
3024 return;
3025 }
3027 case RISCVISD::TUPLE_EXTRACT: {
3028 SDValue V = Node->getOperand(0);
3029 auto Idx = Node->getConstantOperandVal(1);
3030 MVT InVT = V.getSimpleValueType();
3031 SDLoc DL(V);
3032
3033 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3034 MVT SubVecContainerVT = VT;
3035 // Establish the correct scalable-vector types for any fixed-length type.
3036 if (VT.isFixedLengthVector()) {
3037 assert(Idx == 0);
3038 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3039 }
3040 if (InVT.isFixedLengthVector())
3041 InVT = TLI.getContainerForFixedLengthVector(InVT);
3042
3043 const auto *TRI = Subtarget->getRegisterInfo();
3044 unsigned SubRegIdx;
3045 std::tie(SubRegIdx, Idx) =
3047 InVT, SubVecContainerVT, Idx, TRI);
3048
3049 // If the Idx hasn't been completely eliminated then this is a subvector
3050 // extract which doesn't naturally align to a vector register. These must
3051 // be handled using instructions to manipulate the vector registers.
3052 if (Idx != 0)
3053 break;
3054
3055 // If we haven't set a SubRegIdx, then we must be going between
3056 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3057 if (SubRegIdx == RISCV::NoSubRegister) {
3058 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
3060 InRegClassID &&
3061 "Unexpected subvector extraction");
3062 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3063 SDNode *NewNode =
3064 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3065 ReplaceNode(Node, NewNode);
3066 return;
3067 }
3068
3069 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3070 ReplaceNode(Node, Extract.getNode());
3071 return;
3072 }
3073 case RISCVISD::VMV_S_X_VL:
3074 case RISCVISD::VFMV_S_F_VL:
3075 case RISCVISD::VMV_V_X_VL:
3076 case RISCVISD::VFMV_V_F_VL: {
3077 // Try to match splat of a scalar load to a strided load with stride of x0.
3078 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3079 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3080 if (!Node->getOperand(0).isUndef())
3081 break;
3082 SDValue Src = Node->getOperand(1);
3083 auto *Ld = dyn_cast<LoadSDNode>(Src);
3084 // Can't fold load update node because the second
3085 // output is used so that load update node can't be removed.
3086 if (!Ld || Ld->isIndexed())
3087 break;
3088 EVT MemVT = Ld->getMemoryVT();
3089 // The memory VT should be the same size as the element type.
3090 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3091 break;
3092 if (!IsProfitableToFold(Src, Node, Node) ||
3093 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
3094 break;
3095
3096 SDValue VL;
3097 if (IsScalarMove) {
3098 // We could deal with more VL if we update the VSETVLI insert pass to
3099 // avoid introducing more VSETVLI.
3100 if (!isOneConstant(Node->getOperand(2)))
3101 break;
3102 selectVLOp(Node->getOperand(2), VL);
3103 } else
3104 selectVLOp(Node->getOperand(2), VL);
3105
3106 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
3107 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
3108
3109 // If VL=1, then we don't need to do a strided load and can just do a
3110 // regular load.
3111 bool IsStrided = !isOneConstant(VL);
3112
3113 // Only do a strided load if we have optimized zero-stride vector load.
3114 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3115 break;
3116
3117 SmallVector<SDValue> Operands = {
3118 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
3119 Ld->getBasePtr()};
3120 if (IsStrided)
3121 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
3123 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
3124 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
3125
3127 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3128 /*IsMasked*/ false, IsStrided, /*FF*/ false,
3129 Log2SEW, static_cast<unsigned>(LMUL));
3130 MachineSDNode *Load =
3131 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
3132 // Update the chain.
3133 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
3134 // Record the mem-refs
3135 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
3136 // Replace the splat with the vlse.
3137 ReplaceNode(Node, Load);
3138 return;
3139 }
3140 case ISD::PREFETCH:
3141 // MIPS's prefetch instruction already encodes the hint within the
3142 // instruction itself, so no extra NTL hint is needed.
3143 if (Subtarget->hasVendorXMIPSCBOP())
3144 break;
3145
3146 unsigned Locality = Node->getConstantOperandVal(3);
3147 if (Locality > 2)
3148 break;
3149
3150 auto *LoadStoreMem = cast<MemSDNode>(Node);
3151 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3153
3154 int NontemporalLevel = 0;
3155 switch (Locality) {
3156 case 0:
3157 NontemporalLevel = 3; // NTL.ALL
3158 break;
3159 case 1:
3160 NontemporalLevel = 1; // NTL.PALL
3161 break;
3162 case 2:
3163 NontemporalLevel = 0; // NTL.P1
3164 break;
3165 default:
3166 llvm_unreachable("unexpected locality value.");
3167 }
3168
3169 if (NontemporalLevel & 0b1)
3171 if (NontemporalLevel & 0b10)
3173 break;
3174 }
3175
3176 // Select the default instruction.
3177 SelectCode(Node);
3178}
3179
3181 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3182 std::vector<SDValue> &OutOps) {
3183 // Always produce a register and immediate operand, as expected by
3184 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3185 switch (ConstraintID) {
3188 SDValue Op0, Op1;
3189 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
3190 assert(Found && "SelectAddrRegImm should always succeed");
3191 OutOps.push_back(Op0);
3192 OutOps.push_back(Op1);
3193 return false;
3194 }
3196 OutOps.push_back(Op);
3197 OutOps.push_back(
3198 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
3199 return false;
3200 default:
3201 report_fatal_error("Unexpected asm memory constraint " +
3202 InlineAsm::getMemConstraintName(ConstraintID));
3203 }
3204
3205 return true;
3206}
3207
3209 SDValue &Offset) {
3210 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3211 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3212 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3213 return true;
3214 }
3215
3216 return false;
3217}
3218
3219// Fold constant addresses.
3220static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3221 const MVT VT, const RISCVSubtarget *Subtarget,
3223 bool IsPrefetch = false) {
3224 if (!isa<ConstantSDNode>(Addr))
3225 return false;
3226
3227 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3228
3229 // If the constant is a simm12, we can fold the whole constant and use X0 as
3230 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3231 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3232 int64_t Lo12 = SignExtend64<12>(CVal);
3233 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3234 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3235 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3236 return false;
3237 if (Hi) {
3238 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3239 Base = SDValue(
3240 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3241 CurDAG->getTargetConstant(Hi20, DL, VT)),
3242 0);
3243 } else {
3244 Base = CurDAG->getRegister(RISCV::X0, VT);
3245 }
3246 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3247 return true;
3248 }
3249
3250 // Ask how constant materialization would handle this constant.
3251 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3252
3253 // If the last instruction would be an ADDI, we can fold its immediate and
3254 // emit the rest of the sequence as the base.
3255 if (Seq.back().getOpcode() != RISCV::ADDI)
3256 return false;
3257 Lo12 = Seq.back().getImm();
3258 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3259 return false;
3260
3261 // Drop the last instruction.
3262 Seq.pop_back();
3263 assert(!Seq.empty() && "Expected more instructions in sequence");
3264
3265 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3266 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3267 return true;
3268}
3269
3270// Is this ADD instruction only used as the base pointer of scalar loads and
3271// stores?
3273 for (auto *User : Add->users()) {
3274 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3275 User->getOpcode() != RISCVISD::LD_RV32 &&
3276 User->getOpcode() != RISCVISD::SD_RV32 &&
3277 User->getOpcode() != ISD::ATOMIC_LOAD &&
3278 User->getOpcode() != ISD::ATOMIC_STORE)
3279 return false;
3280 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3281 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3282 VT != MVT::f64)
3283 return false;
3284 // Don't allow stores of the value. It must be used as the address.
3285 if (User->getOpcode() == ISD::STORE &&
3286 cast<StoreSDNode>(User)->getValue() == Add)
3287 return false;
3288 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3289 cast<AtomicSDNode>(User)->getVal() == Add)
3290 return false;
3291 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3292 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3293 return false;
3294 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3295 return false;
3296 }
3297
3298 return true;
3299}
3300
3302 switch (User->getOpcode()) {
3303 default:
3304 return false;
3305 case ISD::LOAD:
3306 case RISCVISD::LD_RV32:
3307 case ISD::ATOMIC_LOAD:
3308 break;
3309 case ISD::STORE:
3310 // Don't allow stores of Add. It must only be used as the address.
3311 if (cast<StoreSDNode>(User)->getValue() == Add)
3312 return false;
3313 break;
3314 case RISCVISD::SD_RV32:
3315 // Don't allow stores of Add. It must only be used as the address.
3316 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3317 return false;
3318 break;
3319 case ISD::ATOMIC_STORE:
3320 // Don't allow stores of Add. It must only be used as the address.
3321 if (cast<AtomicSDNode>(User)->getVal() == Add)
3322 return false;
3323 break;
3324 }
3325
3326 return true;
3327}
3328
3329// To prevent SelectAddrRegImm from folding offsets that conflict with the
3330// fusion of PseudoMovAddr, check if the offset of every use of a given address
3331// is within the alignment.
3333 Align Alignment) {
3334 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3335 for (auto *User : Addr->users()) {
3336 // If the user is a load or store, then the offset is 0 which is always
3337 // within alignment.
3338 if (isRegImmLoadOrStore(User, Addr))
3339 continue;
3340
3341 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3342 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3343 if (!isInt<12>(CVal) || Alignment <= CVal)
3344 return false;
3345
3346 // Make sure all uses are foldable load/stores.
3347 for (auto *AddUser : User->users())
3348 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3349 return false;
3350
3351 continue;
3352 }
3353
3354 return false;
3355 }
3356
3357 return true;
3358}
3359
3361 SDValue &Offset) {
3362 if (SelectAddrFrameIndex(Addr, Base, Offset))
3363 return true;
3364
3365 SDLoc DL(Addr);
3366 MVT VT = Addr.getSimpleValueType();
3367
3368 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3369 bool CanFold = true;
3370 // Unconditionally fold if operand 1 is not a global address (e.g.
3371 // externsymbol)
3372 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3373 const DataLayout &DL = CurDAG->getDataLayout();
3374 Align Alignment = commonAlignment(
3375 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3376 if (!areOffsetsWithinAlignment(Addr, Alignment))
3377 CanFold = false;
3378 }
3379 if (CanFold) {
3380 Base = Addr.getOperand(0);
3381 Offset = Addr.getOperand(1);
3382 return true;
3383 }
3384 }
3385
3386 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3387 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3388 if (isInt<12>(CVal)) {
3389 Base = Addr.getOperand(0);
3390 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3391 SDValue LoOperand = Base.getOperand(1);
3392 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3393 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3394 // (its low part, really), then we can rely on the alignment of that
3395 // variable to provide a margin of safety before low part can overflow
3396 // the 12 bits of the load/store offset. Check if CVal falls within
3397 // that margin; if so (low part + CVal) can't overflow.
3398 const DataLayout &DL = CurDAG->getDataLayout();
3399 Align Alignment = commonAlignment(
3400 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3401 if ((CVal == 0 || Alignment > CVal) &&
3402 areOffsetsWithinAlignment(Base, Alignment)) {
3403 int64_t CombinedOffset = CVal + GA->getOffset();
3404 Base = Base.getOperand(0);
3405 Offset = CurDAG->getTargetGlobalAddress(
3406 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3407 CombinedOffset, GA->getTargetFlags());
3408 return true;
3409 }
3410 }
3411 }
3412
3413 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3414 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3415 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3416 return true;
3417 }
3418 }
3419
3420 // Handle ADD with large immediates.
3421 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3422 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3423 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3424
3425 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3426 // an ADDI for part of the offset and fold the rest into the load/store.
3427 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3428 if (CVal >= -4096 && CVal <= 4094) {
3429 int64_t Adj = CVal < 0 ? -2048 : 2047;
3430 Base = SDValue(
3431 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3432 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3433 0);
3434 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3435 return true;
3436 }
3437
3438 // For larger immediates, we might be able to save one instruction from
3439 // constant materialization by folding the Lo12 bits of the immediate into
3440 // the address. We should only do this if the ADD is only used by loads and
3441 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3442 // separately with the full materialized immediate creating extra
3443 // instructions.
3444 if (isWorthFoldingAdd(Addr) &&
3445 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3446 Offset, /*IsPrefetch=*/false)) {
3447 // Insert an ADD instruction with the materialized Hi52 bits.
3448 Base = SDValue(
3449 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3450 0);
3451 return true;
3452 }
3453 }
3454
3455 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3456 /*IsPrefetch=*/false))
3457 return true;
3458
3459 Base = Addr;
3460 Offset = CurDAG->getTargetConstant(0, DL, VT);
3461 return true;
3462}
3463
3464/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3466 SDValue &Offset) {
3467 if (SelectAddrFrameIndex(Addr, Base, Offset))
3468 return true;
3469
3470 SDLoc DL(Addr);
3471 MVT VT = Addr.getSimpleValueType();
3472
3473 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3474 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3475 if (isUInt<9>(CVal)) {
3476 Base = Addr.getOperand(0);
3477
3478 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3479 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3480 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3481 return true;
3482 }
3483 }
3484
3485 Base = Addr;
3486 Offset = CurDAG->getTargetConstant(0, DL, VT);
3487 return true;
3488}
3489
3490/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3491/// Offset should be all zeros.
3493 SDValue &Offset) {
3494 if (SelectAddrFrameIndex(Addr, Base, Offset))
3495 return true;
3496
3497 SDLoc DL(Addr);
3498 MVT VT = Addr.getSimpleValueType();
3499
3500 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3501 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3502 if (isInt<12>(CVal)) {
3503 Base = Addr.getOperand(0);
3504
3505 // Early-out if not a valid offset.
3506 if ((CVal & 0b11111) != 0) {
3507 Base = Addr;
3508 Offset = CurDAG->getTargetConstant(0, DL, VT);
3509 return true;
3510 }
3511
3512 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3513 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3514 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3515 return true;
3516 }
3517 }
3518
3519 // Handle ADD with large immediates.
3520 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3521 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3522 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3523
3524 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3525 // one instruction by folding adjustment (-2048 or 2016) into the address.
3526 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3527 int64_t Adj = CVal < 0 ? -2048 : 2016;
3528 int64_t AdjustedOffset = CVal - Adj;
3529 Base =
3530 SDValue(CurDAG->getMachineNode(
3531 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3532 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3533 0);
3534 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3535 return true;
3536 }
3537
3538 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3539 Offset, /*IsPrefetch=*/true)) {
3540 // Insert an ADD instruction with the materialized Hi52 bits.
3541 Base = SDValue(
3542 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3543 0);
3544 return true;
3545 }
3546 }
3547
3548 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3549 /*IsPrefetch=*/true))
3550 return true;
3551
3552 Base = Addr;
3553 Offset = CurDAG->getTargetConstant(0, DL, VT);
3554 return true;
3555}
3556
3557/// Return true if this a load/store that we have a RegRegScale instruction for.
3559 const RISCVSubtarget &Subtarget) {
3560 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3561 return false;
3562 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3563 if (!(VT.isScalarInteger() &&
3564 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3565 !((VT == MVT::f32 || VT == MVT::f64) &&
3566 Subtarget.hasVendorXTHeadFMemIdx()))
3567 return false;
3568 // Don't allow stores of the value. It must be used as the address.
3569 if (User->getOpcode() == ISD::STORE &&
3570 cast<StoreSDNode>(User)->getValue() == Add)
3571 return false;
3572
3573 return true;
3574}
3575
3576/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3577/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3578/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3579/// single addi and we don't have a SHXADD instruction we could use.
3580/// FIXME: May still need to check how many and what kind of users the SHL has.
3582 SDValue Add,
3583 SDValue Shift = SDValue()) {
3584 bool FoundADDI = false;
3585 for (auto *User : Add->users()) {
3586 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3587 continue;
3588
3589 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3590 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3592 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3593 return false;
3594
3595 FoundADDI = true;
3596
3597 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3598 assert(Shift.getOpcode() == ISD::SHL);
3599 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3600 if (Subtarget.hasShlAdd(ShiftAmt))
3601 return false;
3602
3603 // All users of the ADDI should be load/store.
3604 for (auto *ADDIUser : User->users())
3605 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3606 return false;
3607 }
3608
3609 return true;
3610}
3611
3613 unsigned MaxShiftAmount,
3614 SDValue &Base, SDValue &Index,
3615 SDValue &Scale) {
3616 if (Addr.getOpcode() != ISD::ADD)
3617 return false;
3618 SDValue LHS = Addr.getOperand(0);
3619 SDValue RHS = Addr.getOperand(1);
3620
3621 EVT VT = Addr.getSimpleValueType();
3622 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3623 SDValue &Shift) {
3624 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3625 return false;
3626
3627 // Only match shifts by a value in range [0, MaxShiftAmount].
3628 unsigned ShiftAmt = N.getConstantOperandVal(1);
3629 if (ShiftAmt > MaxShiftAmount)
3630 return false;
3631
3632 Index = N.getOperand(0);
3633 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3634 return true;
3635 };
3636
3637 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3638 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3639 if (LHS.getOpcode() == ISD::ADD &&
3640 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3641 isInt<12>(C1->getSExtValue())) {
3642 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3643 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3644 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3645 SDLoc(Addr), VT);
3646 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3647 LHS.getOperand(0), C1Val),
3648 0);
3649 return true;
3650 }
3651
3652 // Add is commutative so we need to check both operands.
3653 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3654 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3655 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3656 SDLoc(Addr), VT);
3657 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3658 LHS.getOperand(1), C1Val),
3659 0);
3660 return true;
3661 }
3662 }
3663
3664 // Don't match add with constants.
3665 // FIXME: Is this profitable for large constants that have 0s in the lower
3666 // 12 bits that we can materialize with LUI?
3667 return false;
3668 }
3669
3670 // Try to match a shift on the RHS.
3671 if (SelectShl(RHS, Index, Scale)) {
3672 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3673 return false;
3674 Base = LHS;
3675 return true;
3676 }
3677
3678 // Try to match a shift on the LHS.
3679 if (SelectShl(LHS, Index, Scale)) {
3680 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3681 return false;
3682 Base = RHS;
3683 return true;
3684 }
3685
3686 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3687 return false;
3688
3689 Base = LHS;
3690 Index = RHS;
3691 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3692 return true;
3693}
3694
3696 unsigned MaxShiftAmount,
3697 unsigned Bits, SDValue &Base,
3698 SDValue &Index,
3699 SDValue &Scale) {
3700 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3701 return false;
3702
3703 if (Index.getOpcode() == ISD::AND) {
3704 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3705 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3706 Index = Index.getOperand(0);
3707 return true;
3708 }
3709 }
3710
3711 return false;
3712}
3713
3715 SDValue &Offset) {
3716 if (Addr.getOpcode() != ISD::ADD)
3717 return false;
3718
3719 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3720 return false;
3721
3722 Base = Addr.getOperand(0);
3723 Offset = Addr.getOperand(1);
3724 return true;
3725}
3726
3728 SDValue &ShAmt) {
3729 ShAmt = N;
3730
3731 // Peek through zext.
3732 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3733 ShAmt = ShAmt.getOperand(0);
3734
3735 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3736 // amount. If there is an AND on the shift amount, we can bypass it if it
3737 // doesn't affect any of those bits.
3738 if (ShAmt.getOpcode() == ISD::AND &&
3739 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3740 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3741
3742 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3743 // mask that covers the bits needed to represent all shift amounts.
3744 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3745 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3746
3747 if (ShMask.isSubsetOf(AndMask)) {
3748 ShAmt = ShAmt.getOperand(0);
3749 } else {
3750 // SimplifyDemandedBits may have optimized the mask so try restoring any
3751 // bits that are known zero.
3752 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3753 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3754 return true;
3755 ShAmt = ShAmt.getOperand(0);
3756 }
3757 }
3758
3759 if (ShAmt.getOpcode() == ISD::ADD &&
3760 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3761 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3762 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3763 // to avoid the ADD.
3764 if (Imm != 0 && Imm % ShiftWidth == 0) {
3765 ShAmt = ShAmt.getOperand(0);
3766 return true;
3767 }
3768 } else if (ShAmt.getOpcode() == ISD::SUB &&
3769 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3770 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3771 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3772 // generate a NEG instead of a SUB of a constant.
3773 if (Imm != 0 && Imm % ShiftWidth == 0) {
3774 SDLoc DL(ShAmt);
3775 EVT VT = ShAmt.getValueType();
3776 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3777 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3778 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3779 ShAmt.getOperand(1));
3780 ShAmt = SDValue(Neg, 0);
3781 return true;
3782 }
3783 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3784 // to generate a NOT instead of a SUB of a constant.
3785 if (Imm % ShiftWidth == ShiftWidth - 1) {
3786 SDLoc DL(ShAmt);
3787 EVT VT = ShAmt.getValueType();
3788 MachineSDNode *Not = CurDAG->getMachineNode(
3789 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3790 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3791 ShAmt = SDValue(Not, 0);
3792 return true;
3793 }
3794 }
3795
3796 return true;
3797}
3798
3799/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3800/// check for equality with 0. This function emits instructions that convert the
3801/// seteq/setne into something that can be compared with 0.
3802/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3803/// ISD::SETNE).
3805 SDValue &Val) {
3806 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3807 "Unexpected condition code!");
3808
3809 // We're looking for a setcc.
3810 if (N->getOpcode() != ISD::SETCC)
3811 return false;
3812
3813 // Must be an equality comparison.
3814 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3815 if (CCVal != ExpectedCCVal)
3816 return false;
3817
3818 SDValue LHS = N->getOperand(0);
3819 SDValue RHS = N->getOperand(1);
3820
3821 if (!LHS.getValueType().isScalarInteger())
3822 return false;
3823
3824 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3825 if (isNullConstant(RHS)) {
3826 Val = LHS;
3827 return true;
3828 }
3829
3830 SDLoc DL(N);
3831
3832 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3833 int64_t CVal = C->getSExtValue();
3834 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3835 // non-zero otherwise.
3836 if (CVal == -2048) {
3837 Val = SDValue(
3838 CurDAG->getMachineNode(
3839 RISCV::XORI, DL, N->getValueType(0), LHS,
3840 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3841 0);
3842 return true;
3843 }
3844 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3845 // if the LHS is equal to the RHS and non-zero otherwise.
3846 if (isInt<12>(CVal) || CVal == 2048) {
3847 unsigned Opc = RISCV::ADDI;
3848 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3849 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3850 Opc = RISCV::ADDIW;
3851 LHS = LHS.getOperand(0);
3852 }
3853
3854 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3855 CurDAG->getSignedTargetConstant(
3856 -CVal, DL, N->getValueType(0))),
3857 0);
3858 return true;
3859 }
3860 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3861 Val = SDValue(
3862 CurDAG->getMachineNode(
3863 RISCV::BINVI, DL, N->getValueType(0), LHS,
3864 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3865 0);
3866 return true;
3867 }
3868 // Same as the addi case above but for larger immediates (signed 26-bit) use
3869 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3870 // anything which can be done with a single lui as it might be compressible.
3871 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3872 (CVal & 0xFFF) != 0) {
3873 Val = SDValue(
3874 CurDAG->getMachineNode(
3875 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3876 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3877 0);
3878 return true;
3879 }
3880 }
3881
3882 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3883 // equal and a non-zero value if they aren't.
3884 Val = SDValue(
3885 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3886 return true;
3887}
3888
3890 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3891 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3892 Val = N.getOperand(0);
3893 return true;
3894 }
3895
3896 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3897 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3898 return N;
3899
3900 SDValue N0 = N.getOperand(0);
3901 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3902 N.getConstantOperandVal(1) == ShiftAmt &&
3903 N0.getConstantOperandVal(1) == ShiftAmt)
3904 return N0.getOperand(0);
3905
3906 return N;
3907 };
3908
3909 MVT VT = N.getSimpleValueType();
3910 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3911 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3912 return true;
3913 }
3914
3915 return false;
3916}
3917
3919 if (N.getOpcode() == ISD::AND) {
3920 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3921 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3922 Val = N.getOperand(0);
3923 return true;
3924 }
3925 }
3926 MVT VT = N.getSimpleValueType();
3927 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3928 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3929 Val = N;
3930 return true;
3931 }
3932
3933 return false;
3934}
3935
3936/// Look for various patterns that can be done with a SHL that can be folded
3937/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3938/// SHXADD we are trying to match.
3940 SDValue &Val) {
3941 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3942 SDValue N0 = N.getOperand(0);
3943
3944 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3945 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3947 uint64_t Mask = N.getConstantOperandVal(1);
3948 unsigned C2 = N0.getConstantOperandVal(1);
3949
3950 unsigned XLen = Subtarget->getXLen();
3951 if (LeftShift)
3952 Mask &= maskTrailingZeros<uint64_t>(C2);
3953 else
3954 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3955
3956 if (isShiftedMask_64(Mask)) {
3957 unsigned Leading = XLen - llvm::bit_width(Mask);
3958 unsigned Trailing = llvm::countr_zero(Mask);
3959 if (Trailing != ShAmt)
3960 return false;
3961
3962 unsigned Opcode;
3963 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3964 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3965 // followed by a SHXADD with c3 for the X amount.
3966 if (LeftShift && Leading == 0 && C2 < Trailing)
3967 Opcode = RISCV::SRLI;
3968 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3969 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3970 // followed by a SHXADD with c3 for the X amount.
3971 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3972 Opcode = RISCV::SRLIW;
3973 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3974 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3975 // followed by a SHXADD using c3 for the X amount.
3976 else if (!LeftShift && Leading == C2)
3977 Opcode = RISCV::SRLI;
3978 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3979 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3980 // followed by a SHXADD using c3 for the X amount.
3981 else if (!LeftShift && Leading == 32 + C2)
3982 Opcode = RISCV::SRLIW;
3983 else
3984 return false;
3985
3986 SDLoc DL(N);
3987 EVT VT = N.getValueType();
3988 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3989 Val = SDValue(
3990 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3991 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3992 0);
3993 return true;
3994 }
3995 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3997 uint64_t Mask = N.getConstantOperandVal(1);
3998 unsigned C2 = N0.getConstantOperandVal(1);
3999
4000 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4001 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4002 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4003 // the X amount.
4004 if (isShiftedMask_64(Mask)) {
4005 unsigned XLen = Subtarget->getXLen();
4006 unsigned Leading = XLen - llvm::bit_width(Mask);
4007 unsigned Trailing = llvm::countr_zero(Mask);
4008 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4009 SDLoc DL(N);
4010 EVT VT = N.getValueType();
4011 Val = SDValue(CurDAG->getMachineNode(
4012 RISCV::SRAI, DL, VT, N0.getOperand(0),
4013 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
4014 0);
4015 Val = SDValue(CurDAG->getMachineNode(
4016 RISCV::SRLI, DL, VT, Val,
4017 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
4018 0);
4019 return true;
4020 }
4021 }
4022 }
4023 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4024 (LeftShift || N.getOpcode() == ISD::SRL) &&
4025 isa<ConstantSDNode>(N.getOperand(1))) {
4026 SDValue N0 = N.getOperand(0);
4027 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4029 uint64_t Mask = N0.getConstantOperandVal(1);
4030 if (isShiftedMask_64(Mask)) {
4031 unsigned C1 = N.getConstantOperandVal(1);
4032 unsigned XLen = Subtarget->getXLen();
4033 unsigned Leading = XLen - llvm::bit_width(Mask);
4034 unsigned Trailing = llvm::countr_zero(Mask);
4035 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4036 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4037 if (LeftShift && Leading == 32 && Trailing > 0 &&
4038 (Trailing + C1) == ShAmt) {
4039 SDLoc DL(N);
4040 EVT VT = N.getValueType();
4041 Val = SDValue(CurDAG->getMachineNode(
4042 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4043 CurDAG->getTargetConstant(Trailing, DL, VT)),
4044 0);
4045 return true;
4046 }
4047 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4048 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4049 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4050 (Trailing - C1) == ShAmt) {
4051 SDLoc DL(N);
4052 EVT VT = N.getValueType();
4053 Val = SDValue(CurDAG->getMachineNode(
4054 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4055 CurDAG->getTargetConstant(Trailing, DL, VT)),
4056 0);
4057 return true;
4058 }
4059 }
4060 }
4061 }
4062
4063 return false;
4064}
4065
4066/// Look for various patterns that can be done with a SHL that can be folded
4067/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4068/// SHXADD_UW we are trying to match.
4070 SDValue &Val) {
4071 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
4072 N.hasOneUse()) {
4073 SDValue N0 = N.getOperand(0);
4074 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4075 N0.hasOneUse()) {
4076 uint64_t Mask = N.getConstantOperandVal(1);
4077 unsigned C2 = N0.getConstantOperandVal(1);
4078
4079 Mask &= maskTrailingZeros<uint64_t>(C2);
4080
4081 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4082 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4083 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4084 if (isShiftedMask_64(Mask)) {
4085 unsigned Leading = llvm::countl_zero(Mask);
4086 unsigned Trailing = llvm::countr_zero(Mask);
4087 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4088 SDLoc DL(N);
4089 EVT VT = N.getValueType();
4090 Val = SDValue(CurDAG->getMachineNode(
4091 RISCV::SLLI, DL, VT, N0.getOperand(0),
4092 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
4093 0);
4094 return true;
4095 }
4096 }
4097 }
4098 }
4099
4100 return false;
4101}
4102
4104 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4105 if (N->getFlags().hasDisjoint())
4106 return true;
4107 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
4108}
4109
4110bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4111 SDValue N, SDValue &Val) {
4112 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
4113 /*CompressionCost=*/true);
4114 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
4115 /*CompressionCost=*/true);
4116 if (OrigCost <= Cost)
4117 return false;
4118
4119 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
4120 return true;
4121}
4122
4124 if (!isa<ConstantSDNode>(N))
4125 return false;
4126 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4127 if ((Imm >> 31) != 1)
4128 return false;
4129
4130 for (const SDNode *U : N->users()) {
4131 switch (U->getOpcode()) {
4132 case ISD::ADD:
4133 break;
4134 case ISD::OR:
4135 if (orDisjoint(U))
4136 break;
4137 return false;
4138 default:
4139 return false;
4140 }
4141 }
4142
4143 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
4144}
4145
4147 if (!isa<ConstantSDNode>(N))
4148 return false;
4149 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4150 if (isInt<32>(Imm))
4151 return false;
4152 if (Imm == INT64_MIN)
4153 return false;
4154
4155 for (const SDNode *U : N->users()) {
4156 switch (U->getOpcode()) {
4157 case ISD::ADD:
4158 break;
4159 case RISCVISD::VMV_V_X_VL:
4160 if (!all_of(U->users(), [](const SDNode *V) {
4161 return V->getOpcode() == ISD::ADD ||
4162 V->getOpcode() == RISCVISD::ADD_VL;
4163 }))
4164 return false;
4165 break;
4166 default:
4167 return false;
4168 }
4169 }
4170
4171 return selectImm64IfCheaper(-Imm, Imm, N, Val);
4172}
4173
4175 if (!isa<ConstantSDNode>(N))
4176 return false;
4177 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4178
4179 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4180 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4181 return false;
4182
4183 // Abandon this transform if the constant is needed elsewhere.
4184 for (const SDNode *U : N->users()) {
4185 switch (U->getOpcode()) {
4186 case ISD::AND:
4187 case ISD::OR:
4188 case ISD::XOR:
4189 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4190 return false;
4191 break;
4192 case RISCVISD::VMV_V_X_VL:
4193 if (!Subtarget->hasStdExtZvkb())
4194 return false;
4195 if (!all_of(U->users(), [](const SDNode *V) {
4196 return V->getOpcode() == ISD::AND ||
4197 V->getOpcode() == RISCVISD::AND_VL;
4198 }))
4199 return false;
4200 break;
4201 default:
4202 return false;
4203 }
4204 }
4205
4206 if (isInt<32>(Imm)) {
4207 Val =
4208 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
4209 return true;
4210 }
4211
4212 // For 64-bit constants, the instruction sequences get complex,
4213 // so we select inverted only if it's cheaper.
4214 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4215}
4216
4217static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4218 unsigned Bits,
4219 const TargetInstrInfo *TII) {
4220 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4221
4222 if (!MCOpcode)
4223 return false;
4224
4225 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4226 const uint64_t TSFlags = MCID.TSFlags;
4227 if (!RISCVII::hasSEWOp(TSFlags))
4228 return false;
4229 assert(RISCVII::hasVLOp(TSFlags));
4230
4231 unsigned ChainOpIdx = User->getNumOperands() - 1;
4232 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4233 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4234 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4235 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4236
4237 if (UserOpNo == VLIdx)
4238 return false;
4239
4240 auto NumDemandedBits =
4241 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4242 return NumDemandedBits && Bits >= *NumDemandedBits;
4243}
4244
4245// Return true if all users of this SDNode* only consume the lower \p Bits.
4246// This can be used to form W instructions for add/sub/mul/shl even when the
4247// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4248// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4249// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4250// the add/sub/mul/shl to become non-W instructions. By checking the users we
4251// may be able to use a W instruction and CSE with the other instruction if
4252// this has happened. We could try to detect that the CSE opportunity exists
4253// before doing this, but that would be more complicated.
4255 const unsigned Depth) const {
4256 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4257 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4258 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4259 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4260 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4261 isa<ConstantSDNode>(Node) || Depth != 0) &&
4262 "Unexpected opcode");
4263
4265 return false;
4266
4267 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4268 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4269 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4270 return false;
4271
4272 for (SDUse &Use : Node->uses()) {
4273 SDNode *User = Use.getUser();
4274 // Users of this node should have already been instruction selected
4275 if (!User->isMachineOpcode())
4276 return false;
4277
4278 // TODO: Add more opcodes?
4279 switch (User->getMachineOpcode()) {
4280 default:
4282 break;
4283 return false;
4284 case RISCV::ADDW:
4285 case RISCV::ADDIW:
4286 case RISCV::SUBW:
4287 case RISCV::MULW:
4288 case RISCV::SLLW:
4289 case RISCV::SLLIW:
4290 case RISCV::SRAW:
4291 case RISCV::SRAIW:
4292 case RISCV::SRLW:
4293 case RISCV::SRLIW:
4294 case RISCV::DIVW:
4295 case RISCV::DIVUW:
4296 case RISCV::REMW:
4297 case RISCV::REMUW:
4298 case RISCV::ROLW:
4299 case RISCV::RORW:
4300 case RISCV::RORIW:
4301 case RISCV::CLSW:
4302 case RISCV::CLZW:
4303 case RISCV::CTZW:
4304 case RISCV::CPOPW:
4305 case RISCV::SLLI_UW:
4306 case RISCV::ABSW:
4307 case RISCV::FMV_W_X:
4308 case RISCV::FCVT_H_W:
4309 case RISCV::FCVT_H_W_INX:
4310 case RISCV::FCVT_H_WU:
4311 case RISCV::FCVT_H_WU_INX:
4312 case RISCV::FCVT_S_W:
4313 case RISCV::FCVT_S_W_INX:
4314 case RISCV::FCVT_S_WU:
4315 case RISCV::FCVT_S_WU_INX:
4316 case RISCV::FCVT_D_W:
4317 case RISCV::FCVT_D_W_INX:
4318 case RISCV::FCVT_D_WU:
4319 case RISCV::FCVT_D_WU_INX:
4320 case RISCV::TH_REVW:
4321 case RISCV::TH_SRRIW:
4322 if (Bits >= 32)
4323 break;
4324 return false;
4325 case RISCV::SLL:
4326 case RISCV::SRA:
4327 case RISCV::SRL:
4328 case RISCV::ROL:
4329 case RISCV::ROR:
4330 case RISCV::BSET:
4331 case RISCV::BCLR:
4332 case RISCV::BINV:
4333 // Shift amount operands only use log2(Xlen) bits.
4334 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4335 break;
4336 return false;
4337 case RISCV::SLLI:
4338 // SLLI only uses the lower (XLen - ShAmt) bits.
4339 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4340 break;
4341 return false;
4342 case RISCV::ANDI:
4343 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4344 break;
4345 goto RecCheck;
4346 case RISCV::ORI: {
4347 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4348 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4349 break;
4350 [[fallthrough]];
4351 }
4352 case RISCV::AND:
4353 case RISCV::OR:
4354 case RISCV::XOR:
4355 case RISCV::XORI:
4356 case RISCV::ANDN:
4357 case RISCV::ORN:
4358 case RISCV::XNOR:
4359 case RISCV::SH1ADD:
4360 case RISCV::SH2ADD:
4361 case RISCV::SH3ADD:
4362 RecCheck:
4363 if (hasAllNBitUsers(User, Bits, Depth + 1))
4364 break;
4365 return false;
4366 case RISCV::SRLI: {
4367 unsigned ShAmt = User->getConstantOperandVal(1);
4368 // If we are shifting right by less than Bits, and users don't demand any
4369 // bits that were shifted into [Bits-1:0], then we can consider this as an
4370 // N-Bit user.
4371 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4372 break;
4373 return false;
4374 }
4375 case RISCV::SEXT_B:
4376 case RISCV::PACKH:
4377 if (Bits >= 8)
4378 break;
4379 return false;
4380 case RISCV::SEXT_H:
4381 case RISCV::FMV_H_X:
4382 case RISCV::ZEXT_H_RV32:
4383 case RISCV::ZEXT_H_RV64:
4384 case RISCV::PACKW:
4385 if (Bits >= 16)
4386 break;
4387 return false;
4388 case RISCV::PACK:
4389 if (Bits >= (Subtarget->getXLen() / 2))
4390 break;
4391 return false;
4392 case RISCV::PPAIRE_H:
4393 // If only the lower 32-bits of the result are used, then only the
4394 // lower 16 bits of the inputs are used.
4395 if (Bits >= 16 && hasAllNBitUsers(User, 32, Depth + 1))
4396 break;
4397 return false;
4398 case RISCV::ADD_UW:
4399 case RISCV::SH1ADD_UW:
4400 case RISCV::SH2ADD_UW:
4401 case RISCV::SH3ADD_UW:
4402 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4403 // 32 bits.
4404 if (Use.getOperandNo() == 0 && Bits >= 32)
4405 break;
4406 return false;
4407 case RISCV::SB:
4408 if (Use.getOperandNo() == 0 && Bits >= 8)
4409 break;
4410 return false;
4411 case RISCV::SH:
4412 if (Use.getOperandNo() == 0 && Bits >= 16)
4413 break;
4414 return false;
4415 case RISCV::SW:
4416 if (Use.getOperandNo() == 0 && Bits >= 32)
4417 break;
4418 return false;
4419 case RISCV::TH_EXT:
4420 case RISCV::TH_EXTU: {
4421 unsigned Msb = User->getConstantOperandVal(1);
4422 unsigned Lsb = User->getConstantOperandVal(2);
4423 // Behavior of Msb < Lsb is not well documented.
4424 if (Msb >= Lsb && Bits > Msb)
4425 break;
4426 return false;
4427 }
4428 }
4429 }
4430
4431 return true;
4432}
4433
4434// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4436 SDValue &Shl2) {
4437 auto *C = dyn_cast<ConstantSDNode>(N);
4438 if (!C)
4439 return false;
4440
4441 int64_t Offset = C->getSExtValue();
4442 for (unsigned Shift = 0; Shift < 4; Shift++) {
4443 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4444 EVT VT = N->getValueType(0);
4445 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4446 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4447 return true;
4448 }
4449 }
4450
4451 return false;
4452}
4453
4454// Select VL as a 5 bit immediate or a value that will become a register. This
4455// allows us to choose between VSETIVLI or VSETVLI later.
4457 auto *C = dyn_cast<ConstantSDNode>(N);
4458 if (C && isUInt<5>(C->getZExtValue())) {
4459 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4460 N->getValueType(0));
4461 } else if (C && C->isAllOnes()) {
4462 // Treat all ones as VLMax.
4463 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4464 N->getValueType(0));
4465 } else if (isa<RegisterSDNode>(N) &&
4466 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4467 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4468 // as the register class. Convert X0 to a special immediate to pass the
4469 // MachineVerifier. This is recognized specially by the vsetvli insertion
4470 // pass.
4471 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4472 N->getValueType(0));
4473 } else {
4474 VL = N;
4475 }
4476
4477 return true;
4478}
4479
4481 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4482 if (!N.getOperand(0).isUndef())
4483 return SDValue();
4484 N = N.getOperand(1);
4485 }
4486 SDValue Splat = N;
4487 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4488 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4489 !Splat.getOperand(0).isUndef())
4490 return SDValue();
4491 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4492 return Splat;
4493}
4494
4497 if (!Splat)
4498 return false;
4499
4500 SplatVal = Splat.getOperand(1);
4501 return true;
4502}
4503
4505 SelectionDAG &DAG,
4506 const RISCVSubtarget &Subtarget,
4507 std::function<bool(int64_t)> ValidateImm,
4508 bool Decrement = false) {
4510 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4511 return false;
4512
4513 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4514 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4515 "Unexpected splat operand type");
4516
4517 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4518 // type is wider than the resulting vector element type: an implicit
4519 // truncation first takes place. Therefore, perform a manual
4520 // truncation/sign-extension in order to ignore any truncated bits and catch
4521 // any zero-extended immediate.
4522 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4523 // sign-extending to (XLenVT -1).
4524 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4525
4526 int64_t SplatImm = SplatConst.getSExtValue();
4527
4528 if (!ValidateImm(SplatImm))
4529 return false;
4530
4531 if (Decrement)
4532 SplatImm -= 1;
4533
4534 SplatVal =
4535 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4536 return true;
4537}
4538
4540 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4541 [](int64_t Imm) { return isInt<5>(Imm); });
4542}
4543
4545 return selectVSplatImmHelper(
4546 N, SplatVal, *CurDAG, *Subtarget,
4547 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4548 /*Decrement=*/true);
4549}
4550
4552 return selectVSplatImmHelper(
4553 N, SplatVal, *CurDAG, *Subtarget,
4554 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4555 /*Decrement=*/false);
4556}
4557
4559 SDValue &SplatVal) {
4560 return selectVSplatImmHelper(
4561 N, SplatVal, *CurDAG, *Subtarget,
4562 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4563 /*Decrement=*/true);
4564}
4565
4567 SDValue &SplatVal) {
4568 return selectVSplatImmHelper(
4569 N, SplatVal, *CurDAG, *Subtarget,
4570 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4571}
4572
4575 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4576}
4577
4579 auto IsExtOrTrunc = [](SDValue N) {
4580 switch (N->getOpcode()) {
4581 case ISD::SIGN_EXTEND:
4582 case ISD::ZERO_EXTEND:
4583 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4584 // inactive elements will be undef.
4585 case RISCVISD::TRUNCATE_VECTOR_VL:
4586 case RISCVISD::VSEXT_VL:
4587 case RISCVISD::VZEXT_VL:
4588 return true;
4589 default:
4590 return false;
4591 }
4592 };
4593
4594 // We can have multiple nested nodes, so unravel them all if needed.
4595 while (IsExtOrTrunc(N)) {
4596 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4597 return false;
4598 N = N->getOperand(0);
4599 }
4600
4601 return selectVSplat(N, SplatVal);
4602}
4603
4605 // Allow bitcasts from XLenVT -> FP.
4606 if (N.getOpcode() == ISD::BITCAST &&
4607 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4608 Imm = N.getOperand(0);
4609 return true;
4610 }
4611 // Allow moves from XLenVT to FP.
4612 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4613 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4614 Imm = N.getOperand(0);
4615 return true;
4616 }
4617
4618 // Otherwise, look for FP constants that can materialized with scalar int.
4620 if (!CFP)
4621 return false;
4622 const APFloat &APF = CFP->getValueAPF();
4623 // td can handle +0.0 already.
4624 if (APF.isPosZero())
4625 return false;
4626
4627 MVT VT = CFP->getSimpleValueType(0);
4628
4629 MVT XLenVT = Subtarget->getXLenVT();
4630 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4631 assert(APF.isNegZero() && "Unexpected constant.");
4632 return false;
4633 }
4634 SDLoc DL(N);
4635 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4636 *Subtarget);
4637 return true;
4638}
4639
4641 SDValue &Imm) {
4642 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4643 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4644
4645 if (!isInt<5>(ImmVal))
4646 return false;
4647
4648 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4649 Subtarget->getXLenVT());
4650 return true;
4651 }
4652
4653 return false;
4654}
4655
4656// Match XOR with a VMSET_VL operand. Return the other operand.
4658 if (N.getOpcode() != ISD::XOR)
4659 return false;
4660
4661 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4662 Res = N.getOperand(1);
4663 return true;
4664 }
4665
4666 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4667 Res = N.getOperand(0);
4668 return true;
4669 }
4670
4671 return false;
4672}
4673
4674// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4675// matches the parent's VL. Return the other operand of the VMXOR_VL.
4677 SDValue &Res) {
4678 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4679 return false;
4680
4681 assert(Parent &&
4682 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4683 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4684 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4685 "Unexpected parent");
4686
4687 // The VL should match the parent.
4688 if (Parent->getOperand(2) != N->getOperand(2))
4689 return false;
4690
4691 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4692 Res = N.getOperand(1);
4693 return true;
4694 }
4695
4696 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4697 Res = N.getOperand(0);
4698 return true;
4699 }
4700
4701 return false;
4702}
4703
4704// Try to remove sext.w if the input is a W instruction or can be made into
4705// a W instruction cheaply.
4706bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4707 // Look for the sext.w pattern, addiw rd, rs1, 0.
4708 if (N->getMachineOpcode() != RISCV::ADDIW ||
4709 !isNullConstant(N->getOperand(1)))
4710 return false;
4711
4712 SDValue N0 = N->getOperand(0);
4713 if (!N0.isMachineOpcode())
4714 return false;
4715
4716 switch (N0.getMachineOpcode()) {
4717 default:
4718 break;
4719 case RISCV::ADD:
4720 case RISCV::ADDI:
4721 case RISCV::SUB:
4722 case RISCV::MUL:
4723 case RISCV::SLLI: {
4724 // Convert sext.w+add/sub/mul to their W instructions. This will create
4725 // a new independent instruction. This improves latency.
4726 unsigned Opc;
4727 switch (N0.getMachineOpcode()) {
4728 default:
4729 llvm_unreachable("Unexpected opcode!");
4730 case RISCV::ADD: Opc = RISCV::ADDW; break;
4731 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4732 case RISCV::SUB: Opc = RISCV::SUBW; break;
4733 case RISCV::MUL: Opc = RISCV::MULW; break;
4734 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4735 }
4736
4737 SDValue N00 = N0.getOperand(0);
4738 SDValue N01 = N0.getOperand(1);
4739
4740 // Shift amount needs to be uimm5.
4741 if (N0.getMachineOpcode() == RISCV::SLLI &&
4742 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4743 break;
4744
4745 SDNode *Result =
4746 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4747 N00, N01);
4748 ReplaceUses(N, Result);
4749 return true;
4750 }
4751 case RISCV::ADDW:
4752 case RISCV::ADDIW:
4753 case RISCV::SUBW:
4754 case RISCV::MULW:
4755 case RISCV::SLLIW:
4756 case RISCV::PACKW:
4757 case RISCV::TH_MULAW:
4758 case RISCV::TH_MULAH:
4759 case RISCV::TH_MULSW:
4760 case RISCV::TH_MULSH:
4761 if (N0.getValueType() == MVT::i32)
4762 break;
4763
4764 // Result is already sign extended just remove the sext.w.
4765 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4766 ReplaceUses(N, N0.getNode());
4767 return true;
4768 }
4769
4770 return false;
4771}
4772
4773static bool usesAllOnesMask(SDValue MaskOp) {
4774 const auto IsVMSet = [](unsigned Opc) {
4775 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4776 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4777 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4778 Opc == RISCV::PseudoVMSET_M_B8;
4779 };
4780
4781 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4782 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4783 // assume that it's all-ones? Same applies to its VL.
4784 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4785}
4786
4787static bool isImplicitDef(SDValue V) {
4788 if (!V.isMachineOpcode())
4789 return false;
4790 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4791 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4792 if (!isImplicitDef(V.getOperand(I)))
4793 return false;
4794 return true;
4795 }
4796 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4797}
4798
4799// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4800// corresponding "unmasked" pseudo versions.
4801bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4802 const RISCV::RISCVMaskedPseudoInfo *I =
4803 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4804 if (!I)
4805 return false;
4806
4807 unsigned MaskOpIdx = I->MaskOpIdx;
4808 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4809 return false;
4810
4811 // There are two classes of pseudos in the table - compares and
4812 // everything else. See the comment on RISCVMaskedPseudo for details.
4813 const unsigned Opc = I->UnmaskedPseudo;
4814 const MCInstrDesc &MCID = TII->get(Opc);
4815 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4816
4817 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4818 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4819
4820 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4822 "Unmasked pseudo has policy but masked pseudo doesn't?");
4823 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4824 "Unexpected pseudo structure");
4825 assert(!(HasPassthru && !MaskedHasPassthru) &&
4826 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4827
4829 // Skip the passthru operand at index 0 if the unmasked don't have one.
4830 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4831 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4832 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4833 bool HasChainOp =
4834 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4835 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4836 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4837 // Skip the mask
4838 SDValue Op = N->getOperand(I);
4839 if (I == MaskOpIdx)
4840 continue;
4841 if (DropPolicy && I == LastOpNum)
4842 continue;
4843 Ops.push_back(Op);
4844 }
4845
4846 MachineSDNode *Result =
4847 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4848
4849 if (!N->memoperands_empty())
4850 CurDAG->setNodeMemRefs(Result, N->memoperands());
4851
4852 Result->setFlags(N->getFlags());
4853 ReplaceUses(N, Result);
4854
4855 return true;
4856}
4857
4858/// If our passthru is an implicit_def, use noreg instead. This side
4859/// steps issues with MachineCSE not being able to CSE expressions with
4860/// IMPLICIT_DEF operands while preserving the semantic intent. See
4861/// pr64282 for context. Note that this transform is the last one
4862/// performed at ISEL DAG to DAG.
4863bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4864 bool MadeChange = false;
4865 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4866
4867 while (Position != CurDAG->allnodes_begin()) {
4868 SDNode *N = &*--Position;
4869 if (N->use_empty() || !N->isMachineOpcode())
4870 continue;
4871
4872 const unsigned Opc = N->getMachineOpcode();
4873 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4875 !isImplicitDef(N->getOperand(0)))
4876 continue;
4877
4879 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4880 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4881 SDValue Op = N->getOperand(I);
4882 Ops.push_back(Op);
4883 }
4884
4885 MachineSDNode *Result =
4886 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4887 Result->setFlags(N->getFlags());
4888 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4889 ReplaceUses(N, Result);
4890 MadeChange = true;
4891 }
4892 return MadeChange;
4893}
4894
4895
4896// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4897// for instruction scheduling.
4899 CodeGenOptLevel OptLevel) {
4900 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4901}
4902
4904
4909
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
static std::pair< SDValue, SDValue > extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool isApplicableToPLI(int Val)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
bool isPosZero() const
Definition APFloat.h:1527
bool isNegZero() const
Definition APFloat.h:1528
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
LLVM_ABI bool isSplat(unsigned SplatSizeInBits) const
Check if the APInt consists of a repeated bit pattern.
Definition APInt.cpp:630
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:473
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool selectVMNOT_VLOp(SDNode *Parent, SDValue N, SDValue &Res)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool tryWideningMulAcc(SDNode *Node, const SDLoc &DL)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVMNOTOp(SDValue N, SDValue &Res)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
iterator_range< user_iterator > users()
Definition Value.h:427
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.