LLVM 23.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34
36 "riscv-use-rematerializable-movimm", cl::Hidden,
37 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
38 "constant materialization"),
39 cl::init(false));
40
41#define GET_DAGISEL_BODY RISCVDAGToDAGISel
42#include "RISCVGenDAGISel.inc"
43
45 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
46
47 bool MadeChange = false;
48 while (Position != CurDAG->allnodes_begin()) {
49 SDNode *N = &*--Position;
50 if (N->use_empty())
51 continue;
52
53 SDValue Result;
54 switch (N->getOpcode()) {
55 case ISD::SPLAT_VECTOR: {
56 if (Subtarget->hasStdExtP())
57 break;
58 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
59 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
60 MVT VT = N->getSimpleValueType(0);
61 unsigned Opc =
62 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
63 SDLoc DL(N);
64 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
65 SDValue Src = N->getOperand(0);
66 if (VT.isInteger())
67 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
68 N->getOperand(0));
69 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
70 break;
71 }
72 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
73 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
74 // load. Done after lowering and combining so that we have a chance to
75 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
76 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
77 MVT VT = N->getSimpleValueType(0);
78 SDValue Passthru = N->getOperand(0);
79 SDValue Lo = N->getOperand(1);
80 SDValue Hi = N->getOperand(2);
81 SDValue VL = N->getOperand(3);
82 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
83 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
84 "Unexpected VTs!");
85 MachineFunction &MF = CurDAG->getMachineFunction();
86 SDLoc DL(N);
87
88 // Create temporary stack for each expanding node.
89 SDValue StackSlot =
90 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
91 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
93
94 SDValue Chain = CurDAG->getEntryNode();
95 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
96
97 SDValue OffsetSlot =
98 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
99 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
100 Align(8));
101
102 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
103
104 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
105 SDValue IntID =
106 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
107 SDValue Ops[] = {Chain,
108 IntID,
109 Passthru,
110 StackSlot,
111 CurDAG->getRegister(RISCV::X0, MVT::i64),
112 VL};
113
114 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
115 MVT::i64, MPI, Align(8),
117 break;
118 }
119 case ISD::FP_EXTEND: {
120 // We only have vector patterns for riscv_fpextend_vl in isel.
121 SDLoc DL(N);
122 MVT VT = N->getSimpleValueType(0);
123 if (!VT.isVector())
124 break;
125 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
126 SDValue TrueMask = CurDAG->getNode(
127 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
128 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
129 TrueMask, VLMAX);
130 break;
131 }
132 }
133
134 if (Result) {
135 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
136 LLVM_DEBUG(N->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\nNew: ");
138 LLVM_DEBUG(Result->dump(CurDAG));
139 LLVM_DEBUG(dbgs() << "\n");
140
141 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
142 MadeChange = true;
143 }
144 }
145
146 if (MadeChange)
147 CurDAG->RemoveDeadNodes();
148}
149
151 HandleSDNode Dummy(CurDAG->getRoot());
152 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
153
154 bool MadeChange = false;
155 while (Position != CurDAG->allnodes_begin()) {
156 SDNode *N = &*--Position;
157 // Skip dead nodes and any non-machine opcodes.
158 if (N->use_empty() || !N->isMachineOpcode())
159 continue;
160
161 MadeChange |= doPeepholeSExtW(N);
162
163 // FIXME: This is here only because the VMerge transform doesn't
164 // know how to handle masked true inputs. Once that has been moved
165 // to post-ISEL, this can be deleted as well.
166 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
167 }
168
169 CurDAG->setRoot(Dummy.getValue());
170
171 // After we're done with everything else, convert IMPLICIT_DEF
172 // passthru operands to NoRegister. This is required to workaround
173 // an optimization deficiency in MachineCSE. This really should
174 // be merged back into each of the patterns (i.e. there's no good
175 // reason not to go directly to NoReg), but is being done this way
176 // to allow easy backporting.
177 MadeChange |= doPeepholeNoRegPassThru();
178
179 if (MadeChange)
180 CurDAG->RemoveDeadNodes();
181}
182
183static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
185 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
186 for (const RISCVMatInt::Inst &Inst : Seq) {
187 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
188 SDNode *Result = nullptr;
189 switch (Inst.getOpndKind()) {
190 case RISCVMatInt::Imm:
191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
195 CurDAG->getRegister(RISCV::X0, VT));
196 break;
198 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
199 break;
201 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
202 break;
203 }
204
205 // Only the first instruction has X0 as its source.
206 SrcReg = SDValue(Result, 0);
207 }
208
209 return SrcReg;
210}
211
212static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
213 int64_t Imm, const RISCVSubtarget &Subtarget) {
215
216 // Use a rematerializable pseudo instruction for short sequences if enabled.
217 if (Seq.size() == 2 && UsePseudoMovImm)
218 return SDValue(
219 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
220 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
221 0);
222
223 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
224 // worst an LUI+ADDIW. This will require an extra register, but avoids a
225 // constant pool.
226 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
227 // low and high 32 bits are the same and bit 31 and 63 are set.
228 if (Seq.size() > 3) {
229 unsigned ShiftAmt, AddOpc;
231 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
232 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
233 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
234
235 SDValue SLLI = SDValue(
236 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
237 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
238 0);
239 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
240 }
241 }
242
243 // Otherwise, use the original sequence.
244 return selectImmSeq(CurDAG, DL, VT, Seq);
245}
246
248 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
249 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
250 bool IsLoad, MVT *IndexVT) {
251 SDValue Chain = Node->getOperand(0);
252
253 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
254
255 if (IsStridedOrIndexed) {
256 Operands.push_back(Node->getOperand(CurOp++)); // Index.
257 if (IndexVT)
258 *IndexVT = Operands.back()->getSimpleValueType(0);
259 }
260
261 if (IsMasked) {
262 SDValue Mask = Node->getOperand(CurOp++);
263 Operands.push_back(Mask);
264 }
265 SDValue VL;
266 selectVLOp(Node->getOperand(CurOp++), VL);
267 Operands.push_back(VL);
268
269 MVT XLenVT = Subtarget->getXLenVT();
270 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
271 Operands.push_back(SEWOp);
272
273 // At the IR layer, all the masked load intrinsics have policy operands,
274 // none of the others do. All have passthru operands. For our pseudos,
275 // all loads have policy operands.
276 if (IsLoad) {
278 if (IsMasked)
279 Policy = Node->getConstantOperandVal(CurOp++);
280 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
281 Operands.push_back(PolicyOp);
282 }
283
284 Operands.push_back(Chain); // Chain.
285}
286
287void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
288 bool IsStrided) {
289 SDLoc DL(Node);
290 MVT VT = Node->getSimpleValueType(0);
291 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
293
294 unsigned CurOp = 2;
296
297 Operands.push_back(Node->getOperand(CurOp++));
298
299 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
300 Operands, /*IsLoad=*/true);
301
302 const RISCV::VLSEGPseudo *P =
303 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
304 static_cast<unsigned>(LMUL));
305 MachineSDNode *Load =
306 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
307
308 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
309
310 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
311 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
312 CurDAG->RemoveDeadNode(Node);
313}
314
316 bool IsMasked) {
317 SDLoc DL(Node);
318 MVT VT = Node->getSimpleValueType(0);
319 MVT XLenVT = Subtarget->getXLenVT();
320 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
322
323 unsigned CurOp = 2;
325
326 Operands.push_back(Node->getOperand(CurOp++));
327
328 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
329 /*IsStridedOrIndexed*/ false, Operands,
330 /*IsLoad=*/true);
331
332 const RISCV::VLSEGPseudo *P =
333 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
334 Log2SEW, static_cast<unsigned>(LMUL));
335 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
336 XLenVT, MVT::Other, Operands);
337
338 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
339
340 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
341 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
342 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
343 CurDAG->RemoveDeadNode(Node);
344}
345
346void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
347 bool IsOrdered) {
348 SDLoc DL(Node);
349 MVT VT = Node->getSimpleValueType(0);
350 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
352
353 unsigned CurOp = 2;
355
356 Operands.push_back(Node->getOperand(CurOp++));
357
358 MVT IndexVT;
359 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
360 /*IsStridedOrIndexed*/ true, Operands,
361 /*IsLoad=*/true, &IndexVT);
362
363#ifndef NDEBUG
364 // Number of element = RVVBitsPerBlock * LMUL / SEW
365 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
366 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
367 if (DecodedLMUL.second)
368 ContainedTyNumElts /= DecodedLMUL.first;
369 else
370 ContainedTyNumElts *= DecodedLMUL.first;
371 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
372 "Element count mismatch");
373#endif
374
376 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
377 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
378 reportFatalUsageError("The V extension does not support EEW=64 for index "
379 "values when XLEN=32");
380 }
381 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
382 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
383 static_cast<unsigned>(IndexLMUL));
384 MachineSDNode *Load =
385 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
386
387 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
388
389 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
390 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
391 CurDAG->RemoveDeadNode(Node);
392}
393
394void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
395 bool IsStrided) {
396 SDLoc DL(Node);
397 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
398 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
400
401 unsigned CurOp = 2;
403
404 Operands.push_back(Node->getOperand(CurOp++));
405
406 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
407 Operands);
408
409 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
410 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
411 MachineSDNode *Store =
412 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
413
414 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
415
416 ReplaceNode(Node, Store);
417}
418
419void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
420 bool IsOrdered) {
421 SDLoc DL(Node);
422 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
423 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
425
426 unsigned CurOp = 2;
428
429 Operands.push_back(Node->getOperand(CurOp++));
430
431 MVT IndexVT;
432 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
433 /*IsStridedOrIndexed*/ true, Operands,
434 /*IsLoad=*/false, &IndexVT);
435
436#ifndef NDEBUG
437 // Number of element = RVVBitsPerBlock * LMUL / SEW
438 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
439 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
440 if (DecodedLMUL.second)
441 ContainedTyNumElts /= DecodedLMUL.first;
442 else
443 ContainedTyNumElts *= DecodedLMUL.first;
444 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
445 "Element count mismatch");
446#endif
447
449 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
450 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
451 reportFatalUsageError("The V extension does not support EEW=64 for index "
452 "values when XLEN=32");
453 }
454 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
455 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
456 static_cast<unsigned>(IndexLMUL));
457 MachineSDNode *Store =
458 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
459
460 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
461
462 ReplaceNode(Node, Store);
463}
464
466 if (!Subtarget->hasVInstructions())
467 return;
468
469 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
470
471 SDLoc DL(Node);
472 MVT XLenVT = Subtarget->getXLenVT();
473
474 unsigned IntNo = Node->getConstantOperandVal(0);
475
476 assert((IntNo == Intrinsic::riscv_vsetvli ||
477 IntNo == Intrinsic::riscv_vsetvlimax) &&
478 "Unexpected vsetvli intrinsic");
479
480 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
481 unsigned Offset = (VLMax ? 1 : 2);
482
483 assert(Node->getNumOperands() == Offset + 2 &&
484 "Unexpected number of operands");
485
486 unsigned SEW =
487 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
488 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
489 Node->getConstantOperandVal(Offset + 1) & 0x7);
490
491 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
492 /*MaskAgnostic*/ true);
493 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
494
495 SDValue VLOperand;
496 unsigned Opcode = RISCV::PseudoVSETVLI;
497 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
498 if (auto VLEN = Subtarget->getRealVLen())
499 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
500 VLMax = true;
501 }
502 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
503 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
504 Opcode = RISCV::PseudoVSETVLIX0;
505 } else {
506 VLOperand = Node->getOperand(1);
507
508 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
509 uint64_t AVL = C->getZExtValue();
510 if (isUInt<5>(AVL)) {
511 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
512 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
513 XLenVT, VLImm, VTypeIOp));
514 return;
515 }
516 }
517 }
518
520 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
521}
522
524 if (!Subtarget->hasVendorXSfmmbase())
525 return;
526
527 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
528
529 SDLoc DL(Node);
530 MVT XLenVT = Subtarget->getXLenVT();
531
532 unsigned IntNo = Node->getConstantOperandVal(0);
533
534 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
535 IntNo == Intrinsic::riscv_sf_vsettm ||
536 IntNo == Intrinsic::riscv_sf_vsettk) &&
537 "Unexpected XSfmm vset intrinsic");
538
539 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
540 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
541 unsigned PseudoOpCode =
542 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
543 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
544 : RISCV::PseudoSF_VSETTK;
545
546 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
547 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
548 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
549
550 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
551 Node->getOperand(1), VTypeIOp));
552 } else {
553 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
554 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
556 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
557 Node->getOperand(1), Log2SEW, TWiden));
558 }
559}
560
562 MVT VT = Node->getSimpleValueType(0);
563 unsigned Opcode = Node->getOpcode();
564 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
565 "Unexpected opcode");
566 SDLoc DL(Node);
567
568 // For operations of the form (x << C1) op C2, check if we can use
569 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
570 SDValue N0 = Node->getOperand(0);
571 SDValue N1 = Node->getOperand(1);
572
574 if (!Cst)
575 return false;
576
577 int64_t Val = Cst->getSExtValue();
578
579 // Check if immediate can already use ANDI/ORI/XORI.
580 if (isInt<12>(Val))
581 return false;
582
583 SDValue Shift = N0;
584
585 // If Val is simm32 and we have a sext_inreg from i32, then the binop
586 // produces at least 33 sign bits. We can peek through the sext_inreg and use
587 // a SLLIW at the end.
588 bool SignExt = false;
589 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
590 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
591 SignExt = true;
592 Shift = N0.getOperand(0);
593 }
594
595 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
596 return false;
597
599 if (!ShlCst)
600 return false;
601
602 uint64_t ShAmt = ShlCst->getZExtValue();
603
604 // Make sure that we don't change the operation by removing bits.
605 // This only matters for OR and XOR, AND is unaffected.
606 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
607 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
608 return false;
609
610 int64_t ShiftedVal = Val >> ShAmt;
611 if (!isInt<12>(ShiftedVal))
612 return false;
613
614 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
615 if (SignExt && ShAmt >= 32)
616 return false;
617
618 // Ok, we can reorder to get a smaller immediate.
619 unsigned BinOpc;
620 switch (Opcode) {
621 default: llvm_unreachable("Unexpected opcode");
622 case ISD::AND: BinOpc = RISCV::ANDI; break;
623 case ISD::OR: BinOpc = RISCV::ORI; break;
624 case ISD::XOR: BinOpc = RISCV::XORI; break;
625 }
626
627 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
628
629 SDNode *BinOp = CurDAG->getMachineNode(
630 BinOpc, DL, VT, Shift.getOperand(0),
631 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
632 SDNode *SLLI =
633 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
634 CurDAG->getTargetConstant(ShAmt, DL, VT));
635 ReplaceNode(Node, SLLI);
636 return true;
637}
638
640 unsigned Opc;
641
642 if (Subtarget->hasVendorXTHeadBb())
643 Opc = RISCV::TH_EXT;
644 else if (Subtarget->hasVendorXAndesPerf())
645 Opc = RISCV::NDS_BFOS;
646 else if (Subtarget->hasVendorXqcibm())
647 Opc = RISCV::QC_EXT;
648 else
649 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
650 return false;
651
652 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
653 if (!N1C)
654 return false;
655
656 SDValue N0 = Node->getOperand(0);
657 if (!N0.hasOneUse())
658 return false;
659
660 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
661 const SDLoc &DL, MVT VT) {
662 if (Opc == RISCV::QC_EXT) {
663 // QC.EXT X, width, shamt
664 // shamt is the same as Lsb
665 // width is the number of bits to extract from the Lsb
666 Msb = Msb - Lsb + 1;
667 }
668 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
669 CurDAG->getTargetConstant(Msb, DL, VT),
670 CurDAG->getTargetConstant(Lsb, DL, VT));
671 };
672
673 SDLoc DL(Node);
674 MVT VT = Node->getSimpleValueType(0);
675 const unsigned RightShAmt = N1C->getZExtValue();
676
677 // Transform (sra (shl X, C1) C2) with C1 < C2
678 // -> (SignedBitfieldExtract X, msb, lsb)
679 if (N0.getOpcode() == ISD::SHL) {
680 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
681 if (!N01C)
682 return false;
683
684 const unsigned LeftShAmt = N01C->getZExtValue();
685 // Make sure that this is a bitfield extraction (i.e., the shift-right
686 // amount can not be less than the left-shift).
687 if (LeftShAmt > RightShAmt)
688 return false;
689
690 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
691 const unsigned Msb = MsbPlusOne - 1;
692 const unsigned Lsb = RightShAmt - LeftShAmt;
693
694 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
695 ReplaceNode(Node, Sbe);
696 return true;
697 }
698
699 // Transform (sra (sext_inreg X, _), C) ->
700 // (SignedBitfieldExtract X, msb, lsb)
701 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
702 unsigned ExtSize =
703 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
704
705 // ExtSize of 32 should use sraiw via tablegen pattern.
706 if (ExtSize == 32)
707 return false;
708
709 const unsigned Msb = ExtSize - 1;
710 // If the shift-right amount is greater than Msb, it means that extracts
711 // the X[Msb] bit and sign-extend it.
712 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
713
714 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
715 ReplaceNode(Node, Sbe);
716 return true;
717 }
718
719 return false;
720}
721
723 // Only supported with XAndesPerf at the moment.
724 if (!Subtarget->hasVendorXAndesPerf())
725 return false;
726
727 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
728 if (!N1C)
729 return false;
730
731 SDValue N0 = Node->getOperand(0);
732 if (!N0.hasOneUse())
733 return false;
734
735 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
736 const SDLoc &DL, MVT VT) {
737 unsigned Opc = RISCV::NDS_BFOS;
738 // If the Lsb is equal to the Msb, then the Lsb should be 0.
739 if (Lsb == Msb)
740 Lsb = 0;
741 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
742 CurDAG->getTargetConstant(Lsb, DL, VT),
743 CurDAG->getTargetConstant(Msb, DL, VT));
744 };
745
746 SDLoc DL(Node);
747 MVT VT = Node->getSimpleValueType(0);
748 const unsigned RightShAmt = N1C->getZExtValue();
749
750 // Transform (sra (shl X, C1) C2) with C1 > C2
751 // -> (NDS.BFOS X, lsb, msb)
752 if (N0.getOpcode() == ISD::SHL) {
753 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
754 if (!N01C)
755 return false;
756
757 const unsigned LeftShAmt = N01C->getZExtValue();
758 // Make sure that this is a bitfield insertion (i.e., the shift-right
759 // amount should be less than the left-shift).
760 if (LeftShAmt <= RightShAmt)
761 return false;
762
763 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
764 const unsigned Msb = MsbPlusOne - 1;
765 const unsigned Lsb = LeftShAmt - RightShAmt;
766
767 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
768 ReplaceNode(Node, Sbi);
769 return true;
770 }
771
772 return false;
773}
774
776 const SDLoc &DL, MVT VT,
777 SDValue X, unsigned Msb,
778 unsigned Lsb) {
779 unsigned Opc;
780
781 if (Subtarget->hasVendorXTHeadBb()) {
782 Opc = RISCV::TH_EXTU;
783 } else if (Subtarget->hasVendorXAndesPerf()) {
784 Opc = RISCV::NDS_BFOZ;
785 } else if (Subtarget->hasVendorXqcibm()) {
786 Opc = RISCV::QC_EXTU;
787 // QC.EXTU X, width, shamt
788 // shamt is the same as Lsb
789 // width is the number of bits to extract from the Lsb
790 Msb = Msb - Lsb + 1;
791 } else {
792 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
793 return false;
794 }
795
796 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
797 CurDAG->getTargetConstant(Msb, DL, VT),
798 CurDAG->getTargetConstant(Lsb, DL, VT));
799 ReplaceNode(Node, Ube);
800 return true;
801}
802
804 const SDLoc &DL, MVT VT,
805 SDValue X, unsigned Msb,
806 unsigned Lsb) {
807 // Only supported with XAndesPerf at the moment.
808 if (!Subtarget->hasVendorXAndesPerf())
809 return false;
810
811 unsigned Opc = RISCV::NDS_BFOZ;
812
813 // If the Lsb is equal to the Msb, then the Lsb should be 0.
814 if (Lsb == Msb)
815 Lsb = 0;
816 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
817 CurDAG->getTargetConstant(Lsb, DL, VT),
818 CurDAG->getTargetConstant(Msb, DL, VT));
819 ReplaceNode(Node, Ubi);
820 return true;
821}
822
824 // Target does not support indexed loads.
825 if (!Subtarget->hasVendorXTHeadMemIdx())
826 return false;
827
830 if (AM == ISD::UNINDEXED)
831 return false;
832
834 if (!C)
835 return false;
836
837 EVT LoadVT = Ld->getMemoryVT();
838 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
839 "Unexpected addressing mode");
840 bool IsPre = AM == ISD::PRE_INC;
841 bool IsPost = AM == ISD::POST_INC;
842 int64_t Offset = C->getSExtValue();
843
844 // The constants that can be encoded in the THeadMemIdx instructions
845 // are of the form (sign_extend(imm5) << imm2).
846 unsigned Shift;
847 for (Shift = 0; Shift < 4; Shift++)
848 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
849 break;
850
851 // Constant cannot be encoded.
852 if (Shift == 4)
853 return false;
854
855 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
856 unsigned Opcode;
857 if (LoadVT == MVT::i8 && IsPre)
858 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
859 else if (LoadVT == MVT::i8 && IsPost)
860 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
861 else if (LoadVT == MVT::i16 && IsPre)
862 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
863 else if (LoadVT == MVT::i16 && IsPost)
864 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
865 else if (LoadVT == MVT::i32 && IsPre)
866 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
867 else if (LoadVT == MVT::i32 && IsPost)
868 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
869 else if (LoadVT == MVT::i64 && IsPre)
870 Opcode = RISCV::TH_LDIB;
871 else if (LoadVT == MVT::i64 && IsPost)
872 Opcode = RISCV::TH_LDIA;
873 else
874 return false;
875
876 EVT Ty = Ld->getOffset().getValueType();
877 SDValue Ops[] = {
878 Ld->getBasePtr(),
879 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
880 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
881 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
882 Ld->getValueType(1), MVT::Other, Ops);
883
884 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
885 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
886
887 ReplaceNode(Node, New);
888
889 return true;
890}
891
892static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
893 SDValue Lo, SDValue Hi) {
894 SDValue Ops[] = {
895 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
896 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
897 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
898
899 return SDValue(
900 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops), 0);
901}
902
903// Helper to extract Lo and Hi values from a GPR pair.
904static std::pair<SDValue, SDValue>
906 SDValue Lo =
907 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32, Pair);
908 SDValue Hi =
909 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32, Pair);
910 return {Lo, Hi};
911}
912
913// Try to match WMACC pattern: ADDD where one operand pair comes from a
914// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
916 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
917
918 SDValue Op0Lo = Node->getOperand(0);
919 SDValue Op0Hi = Node->getOperand(1);
920 SDValue Op1Lo = Node->getOperand(2);
921 SDValue Op1Hi = Node->getOperand(3);
922
923 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
924 unsigned Opc = Lo.getOpcode();
925 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
926 Opc != RISCVISD::WMULSU)
927 return false;
928 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
929 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
930 };
931
932 SDNode *MulNode = nullptr;
933 SDValue AddLo, AddHi;
934
935 // Check if first operand pair is a supported multiply with single use.
936 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
937 MulNode = Op0Lo.getNode();
938 AddLo = Op1Lo;
939 AddHi = Op1Hi;
940 }
941 // ADDD is commutative. Check if second operand pair is a supported multiply
942 // with single use.
943 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
944 MulNode = Op1Lo.getNode();
945 AddLo = Op0Lo;
946 AddHi = Op0Hi;
947 } else {
948 return false;
949 }
950
951 unsigned Opc;
952 switch (MulNode->getOpcode()) {
953 default:
954 llvm_unreachable("Unexpected multiply opcode");
955 case ISD::UMUL_LOHI:
956 Opc = RISCV::WMACCU;
957 break;
958 case ISD::SMUL_LOHI:
959 Opc = RISCV::WMACC;
960 break;
961 case RISCVISD::WMULSU:
962 Opc = RISCV::WMACCSU;
963 break;
964 }
965
966 SDValue Acc = buildGPRPair(CurDAG, DL, MVT::Untyped, AddLo, AddHi);
967
968 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
969 SDValue M0 = MulNode->getOperand(0);
970 SDValue M1 = MulNode->getOperand(1);
971 MachineSDNode *New =
972 CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Acc, M0, M1);
973
974 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
977 CurDAG->RemoveDeadNode(Node);
978 return true;
979}
980
981static Register getTileReg(uint64_t TileNum) {
982 assert(TileNum <= 15 && "Invalid tile number");
983 return RISCV::T0 + TileNum;
984}
985
987 if (!Subtarget->hasVInstructions())
988 return;
989
990 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
991
992 SDLoc DL(Node);
993 unsigned IntNo = Node->getConstantOperandVal(1);
994
995 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
996 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
997 "Unexpected vsetvli intrinsic");
998
999 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
1000 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
1001 SDValue SEWOp =
1002 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
1003 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
1004 Node->getOperand(4), Node->getOperand(5),
1005 Node->getOperand(8), SEWOp,
1006 Node->getOperand(0)};
1007
1008 unsigned Opcode;
1009 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
1010 switch (LMulSDNode->getSExtValue()) {
1011 case 5:
1012 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1013 : RISCV::PseudoSF_VC_I_SE_MF8;
1014 break;
1015 case 6:
1016 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1017 : RISCV::PseudoSF_VC_I_SE_MF4;
1018 break;
1019 case 7:
1020 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1021 : RISCV::PseudoSF_VC_I_SE_MF2;
1022 break;
1023 case 0:
1024 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1025 : RISCV::PseudoSF_VC_I_SE_M1;
1026 break;
1027 case 1:
1028 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1029 : RISCV::PseudoSF_VC_I_SE_M2;
1030 break;
1031 case 2:
1032 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1033 : RISCV::PseudoSF_VC_I_SE_M4;
1034 break;
1035 case 3:
1036 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1037 : RISCV::PseudoSF_VC_I_SE_M8;
1038 break;
1039 }
1040
1041 ReplaceNode(Node, CurDAG->getMachineNode(
1042 Opcode, DL, Node->getSimpleValueType(0), Operands));
1043}
1044
1045static unsigned getSegInstNF(unsigned Intrinsic) {
1046#define INST_NF_CASE(NAME, NF) \
1047 case Intrinsic::riscv_##NAME##NF: \
1048 return NF;
1049#define INST_NF_CASE_MASK(NAME, NF) \
1050 case Intrinsic::riscv_##NAME##NF##_mask: \
1051 return NF;
1052#define INST_NF_CASE_FF(NAME, NF) \
1053 case Intrinsic::riscv_##NAME##NF##ff: \
1054 return NF;
1055#define INST_NF_CASE_FF_MASK(NAME, NF) \
1056 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1057 return NF;
1058#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1059 MACRO_NAME(NAME, 2) \
1060 MACRO_NAME(NAME, 3) \
1061 MACRO_NAME(NAME, 4) \
1062 MACRO_NAME(NAME, 5) \
1063 MACRO_NAME(NAME, 6) \
1064 MACRO_NAME(NAME, 7) \
1065 MACRO_NAME(NAME, 8)
1066#define INST_ALL_NF_CASE(NAME) \
1067 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1068 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1069#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1070 INST_ALL_NF_CASE(NAME) \
1071 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1072 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1073 switch (Intrinsic) {
1074 default:
1075 llvm_unreachable("Unexpected segment load/store intrinsic");
1077 INST_ALL_NF_CASE(vlsseg)
1078 INST_ALL_NF_CASE(vloxseg)
1079 INST_ALL_NF_CASE(vluxseg)
1080 INST_ALL_NF_CASE(vsseg)
1081 INST_ALL_NF_CASE(vssseg)
1082 INST_ALL_NF_CASE(vsoxseg)
1083 INST_ALL_NF_CASE(vsuxseg)
1084 }
1085}
1086
1087static bool isApplicableToPLIOrPLUI(int Val) {
1088 // Check if the immediate is packed i8 or i10
1089 int16_t Bit31To16 = Val >> 16;
1090 int16_t Bit15To0 = Val;
1091 int8_t Bit15To8 = Bit15To0 >> 8;
1092 int8_t Bit7To0 = Val;
1093 if (Bit31To16 != Bit15To0)
1094 return false;
1095
1096 return isInt<10>(Bit15To0) || isShiftedInt<10, 6>(Bit15To0) ||
1097 Bit15To8 == Bit7To0;
1098}
1099
1101 // If we have a custom node, we have already selected.
1102 if (Node->isMachineOpcode()) {
1103 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1104 Node->setNodeId(-1);
1105 return;
1106 }
1107
1108 // Instruction Selection not handled by the auto-generated tablegen selection
1109 // should be handled here.
1110 unsigned Opcode = Node->getOpcode();
1111 MVT XLenVT = Subtarget->getXLenVT();
1112 SDLoc DL(Node);
1113 MVT VT = Node->getSimpleValueType(0);
1114
1115 bool HasBitTest = Subtarget->hasBEXTILike();
1116
1117 switch (Opcode) {
1118 case ISD::Constant: {
1119 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1120 auto *ConstNode = cast<ConstantSDNode>(Node);
1121 if (ConstNode->isZero()) {
1122 SDValue New =
1123 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1124 ReplaceNode(Node, New.getNode());
1125 return;
1126 }
1127 int64_t Imm = ConstNode->getSExtValue();
1128 // If only the lower 8 bits are used, try to convert this to a simm6 by
1129 // sign-extending bit 7. This is neutral without the C extension, and
1130 // allows C.LI to be used if C is present.
1131 if (!isInt<8>(Imm) && isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) &&
1133 Imm = SignExtend64<8>(Imm);
1134 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1135 // by sign extending bit 15.
1136 else if (!isInt<16>(Imm) && isUInt<16>(Imm) &&
1138 Imm = SignExtend64<16>(Imm);
1139
1140 // If the upper XLen-16 bits are not used, the lower 2 bytes are the same,
1141 // and we can't use li, convert to an xlen splat so we can use pli.b.
1142 if (Subtarget->hasStdExtP() && !isInt<12>(Imm) &&
1143 (Imm & 0xff) == ((Imm >> 8) & 0xff) && hasAllHUsers(Node)) {
1144 // Splat the lower 16 bits to XLen. Sign extend for RV32.
1145 uint64_t Splat = Imm & 0xffff;
1146 Splat = (Splat << 16) | Splat;
1147 if (VT == MVT::i64)
1148 Imm = Splat << 32 | Splat;
1149 else
1150 Imm = SignExtend64<32>(Splat);
1151 } else {
1152 // If the upper 32-bits are not used try to convert this into a simm32 by
1153 // sign extending bit 32.
1154 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1155 Imm = SignExtend64<32>(Imm);
1156
1157 if (VT == MVT::i64 && !isInt<12>(Imm) && !isShiftedInt<20, 12>(Imm) &&
1158 Subtarget->hasStdExtP() && isApplicableToPLIOrPLUI(Imm) &&
1159 hasAllWUsers(Node)) {
1160 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers,
1161 // we can simply copy lower 32 bits to higher 32 bits to make it able to
1162 // rematerialize to PLI_B or PLI_H
1163 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1164 }
1165 }
1166
1167 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1168 return;
1169 }
1170 case ISD::ConstantFP: {
1171 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1172
1173 bool Is64Bit = Subtarget->is64Bit();
1174 bool HasZdinx = Subtarget->hasStdExtZdinx();
1175
1176 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1177 SDValue Imm;
1178 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1179 // create an integer immediate.
1180 if (APF.isPosZero() || NegZeroF64) {
1181 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1182 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1183 else
1184 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1185 } else {
1186 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1187 *Subtarget);
1188 }
1189
1190 unsigned Opc;
1191 switch (VT.SimpleTy) {
1192 default:
1193 llvm_unreachable("Unexpected size");
1194 case MVT::bf16:
1195 assert(Subtarget->hasStdExtZfbfmin());
1196 Opc = RISCV::FMV_H_X;
1197 break;
1198 case MVT::f16:
1199 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1200 break;
1201 case MVT::f32:
1202 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1203 break;
1204 case MVT::f64:
1205 // For RV32, we can't move from a GPR, we need to convert instead. This
1206 // should only happen for +0.0 and -0.0.
1207 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1208 if (HasZdinx)
1209 Opc = RISCV::COPY;
1210 else
1211 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1212 break;
1213 }
1214
1215 SDNode *Res;
1216 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1217 Res =
1218 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1219 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1220 Res =
1221 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1222 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1223 Res = CurDAG->getMachineNode(
1224 Opc, DL, VT, Imm,
1225 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1226 else
1227 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1228
1229 // For f64 -0.0, we need to insert a fneg.d idiom.
1230 if (NegZeroF64) {
1231 Opc = RISCV::FSGNJN_D;
1232 if (HasZdinx)
1233 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1234 Res =
1235 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1236 }
1237
1238 ReplaceNode(Node, Res);
1239 return;
1240 }
1241 case RISCVISD::BuildGPRPair:
1242 case RISCVISD::BuildPairF64:
1243 case RISCVISD::BuildPairGPRVec: {
1244 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1245 break;
1246
1247 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::BuildPairF64) &&
1248 "BuildPairF64 only handled here on rv32i_zdinx");
1249
1250 SDValue N =
1251 buildGPRPair(CurDAG, DL, VT, Node->getOperand(0), Node->getOperand(1));
1252 ReplaceNode(Node, N.getNode());
1253 return;
1254 }
1255 case RISCVISD::SplitGPRPair:
1256 case RISCVISD::SplitF64:
1257 case RISCVISD::SplitGPRVec: {
1258 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1259 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::SplitF64) &&
1260 "SplitF64 only handled here on rv32i_zdinx");
1261
1262 if (!SDValue(Node, 0).use_empty()) {
1263 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1264 Node->getValueType(0),
1265 Node->getOperand(0));
1266 ReplaceUses(SDValue(Node, 0), Lo);
1267 }
1268
1269 if (!SDValue(Node, 1).use_empty()) {
1270 SDValue Hi = CurDAG->getTargetExtractSubreg(
1271 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1272 ReplaceUses(SDValue(Node, 1), Hi);
1273 }
1274
1275 CurDAG->RemoveDeadNode(Node);
1276 return;
1277 }
1278
1279 if (!Subtarget->hasStdExtZfa())
1280 break;
1281 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1282 "Unexpected subtarget");
1283
1284 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1285 if (!SDValue(Node, 0).use_empty()) {
1286 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1287 Node->getOperand(0));
1288 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1289 }
1290 if (!SDValue(Node, 1).use_empty()) {
1291 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1292 Node->getOperand(0));
1293 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1294 }
1295
1296 CurDAG->RemoveDeadNode(Node);
1297 return;
1298 }
1299 case ISD::SHL: {
1300 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1301 if (!N1C)
1302 break;
1303 SDValue N0 = Node->getOperand(0);
1304 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1306 break;
1307 unsigned ShAmt = N1C->getZExtValue();
1308 uint64_t Mask = N0.getConstantOperandVal(1);
1309
1310 if (isShiftedMask_64(Mask)) {
1311 unsigned XLen = Subtarget->getXLen();
1312 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1313 unsigned TrailingZeros = llvm::countr_zero(Mask);
1314 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1315 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1316 // where C2 has 32 leading zeros and C3 trailing zeros.
1317 SDNode *SRLIW = CurDAG->getMachineNode(
1318 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1319 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1320 SDNode *SLLI = CurDAG->getMachineNode(
1321 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1322 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1323 ReplaceNode(Node, SLLI);
1324 return;
1325 }
1326 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1327 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1328 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1329 // where C2 has C4 leading zeros and no trailing zeros.
1330 // This is profitable if the "and" was to be lowered to
1331 // (srli (slli X, C4), C4) and not (andi X, C2).
1332 // For "LeadingZeros == 32":
1333 // - with Zba it's just (slli.uw X, C)
1334 // - without Zba a tablegen pattern applies the very same
1335 // transform as we would have done here
1336 SDNode *SLLI = CurDAG->getMachineNode(
1337 RISCV::SLLI, DL, VT, N0.getOperand(0),
1338 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1339 SDNode *SRLI = CurDAG->getMachineNode(
1340 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1341 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1342 ReplaceNode(Node, SRLI);
1343 return;
1344 }
1345 }
1346 break;
1347 }
1348 case ISD::SRL: {
1349 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1350 if (!N1C)
1351 break;
1352 SDValue N0 = Node->getOperand(0);
1353 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1354 break;
1355 unsigned ShAmt = N1C->getZExtValue();
1356 uint64_t Mask = N0.getConstantOperandVal(1);
1357
1358 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1359 // 32 leading zeros and C3 trailing zeros.
1360 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1361 unsigned XLen = Subtarget->getXLen();
1362 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1363 unsigned TrailingZeros = llvm::countr_zero(Mask);
1364 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1365 SDNode *SRLIW = CurDAG->getMachineNode(
1366 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1367 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1368 SDNode *SLLI = CurDAG->getMachineNode(
1369 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1370 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1371 ReplaceNode(Node, SLLI);
1372 return;
1373 }
1374 }
1375
1376 // Optimize (srl (and X, C2), C) ->
1377 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1378 // Where C2 is a mask with C3 trailing ones.
1379 // Taking into account that the C2 may have had lower bits unset by
1380 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1381 // This pattern occurs when type legalizing right shifts for types with
1382 // less than XLen bits.
1383 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1384 if (!isMask_64(Mask))
1385 break;
1386 unsigned TrailingOnes = llvm::countr_one(Mask);
1387 if (ShAmt >= TrailingOnes)
1388 break;
1389 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1390 if (TrailingOnes == 32) {
1391 SDNode *SRLI = CurDAG->getMachineNode(
1392 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1393 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1394 ReplaceNode(Node, SRLI);
1395 return;
1396 }
1397
1398 // Only do the remaining transforms if the AND has one use.
1399 if (!N0.hasOneUse())
1400 break;
1401
1402 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1403 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1404 SDNode *BEXTI = CurDAG->getMachineNode(
1405 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1406 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1407 ReplaceNode(Node, BEXTI);
1408 return;
1409 }
1410
1411 const unsigned Msb = TrailingOnes - 1;
1412 const unsigned Lsb = ShAmt;
1413 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1414 return;
1415
1416 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1417 SDNode *SLLI =
1418 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1419 CurDAG->getTargetConstant(LShAmt, DL, VT));
1420 SDNode *SRLI = CurDAG->getMachineNode(
1421 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1422 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1423 ReplaceNode(Node, SRLI);
1424 return;
1425 }
1426 case ISD::SRA: {
1428 return;
1429
1431 return;
1432
1433 // Optimize (sra (sext_inreg X, i16), C) ->
1434 // (srai (slli X, (XLen-16), (XLen-16) + C)
1435 // And (sra (sext_inreg X, i8), C) ->
1436 // (srai (slli X, (XLen-8), (XLen-8) + C)
1437 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1438 // This transform matches the code we get without Zbb. The shifts are more
1439 // compressible, and this can help expose CSE opportunities in the sdiv by
1440 // constant optimization.
1441 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1442 if (!N1C)
1443 break;
1444 SDValue N0 = Node->getOperand(0);
1445 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1446 break;
1447 unsigned ShAmt = N1C->getZExtValue();
1448 unsigned ExtSize =
1449 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1450 // ExtSize of 32 should use sraiw via tablegen pattern.
1451 if (ExtSize >= 32 || ShAmt >= ExtSize)
1452 break;
1453 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1454 SDNode *SLLI =
1455 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1456 CurDAG->getTargetConstant(LShAmt, DL, VT));
1457 SDNode *SRAI = CurDAG->getMachineNode(
1458 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1459 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1460 ReplaceNode(Node, SRAI);
1461 return;
1462 }
1464 // Optimize (sext_inreg (srl X, C), i8/i16) ->
1465 // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize)
1466 // This is a bitfield extract pattern where we're extracting a signed
1467 // 8-bit or 16-bit field from position C.
1468 SDValue N0 = Node->getOperand(0);
1469 if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse())
1470 break;
1471
1472 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1473 if (!ShAmtC)
1474 break;
1475
1476 unsigned ExtSize =
1477 cast<VTSDNode>(Node->getOperand(1))->getVT().getSizeInBits();
1478 unsigned ShAmt = ShAmtC->getZExtValue();
1479 unsigned XLen = Subtarget->getXLen();
1480
1481 // Only handle types less than 32, and make sure the shift amount is valid.
1482 if (ExtSize >= 32 || ShAmt >= XLen - ExtSize)
1483 break;
1484
1485 unsigned LShAmt = XLen - ExtSize - ShAmt;
1486 SDNode *SLLI =
1487 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1488 CurDAG->getTargetConstant(LShAmt, DL, VT));
1489 SDNode *SRAI = CurDAG->getMachineNode(
1490 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1491 CurDAG->getTargetConstant(XLen - ExtSize, DL, VT));
1492 ReplaceNode(Node, SRAI);
1493 return;
1494 }
1495 case ISD::OR: {
1497 return;
1498
1499 break;
1500 }
1501 case ISD::XOR:
1503 return;
1504
1505 break;
1506 case ISD::AND: {
1507 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1508 if (!N1C)
1509 break;
1510
1511 SDValue N0 = Node->getOperand(0);
1512
1513 bool LeftShift = N0.getOpcode() == ISD::SHL;
1514 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1515 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1516 if (!C)
1517 break;
1518 unsigned C2 = C->getZExtValue();
1519 unsigned XLen = Subtarget->getXLen();
1520 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1521
1522 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1523 // shift pair might offer more compression opportunities.
1524 // TODO: We could check for C extension here, but we don't have many lit
1525 // tests with the C extension enabled so not checking gets better
1526 // coverage.
1527 // TODO: What if ANDI faster than shift?
1528 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1529
1530 uint64_t C1 = N1C->getZExtValue();
1531
1532 // Clear irrelevant bits in the mask.
1533 if (LeftShift)
1535 else
1536 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1537
1538 // Some transforms should only be done if the shift has a single use or
1539 // the AND would become (srli (slli X, 32), 32)
1540 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1541
1542 SDValue X = N0.getOperand(0);
1543
1544 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1545 // with c3 leading zeros.
1546 if (!LeftShift && isMask_64(C1)) {
1547 unsigned Leading = XLen - llvm::bit_width(C1);
1548 if (C2 < Leading) {
1549 // If the number of leading zeros is C2+32 this can be SRLIW.
1550 if (C2 + 32 == Leading) {
1551 SDNode *SRLIW = CurDAG->getMachineNode(
1552 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1553 ReplaceNode(Node, SRLIW);
1554 return;
1555 }
1556
1557 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1558 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1559 //
1560 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1561 // legalized and goes through DAG combine.
1562 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1563 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1564 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1565 SDNode *SRAIW =
1566 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1567 CurDAG->getTargetConstant(31, DL, VT));
1568 SDNode *SRLIW = CurDAG->getMachineNode(
1569 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1570 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1571 ReplaceNode(Node, SRLIW);
1572 return;
1573 }
1574
1575 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1576 // available.
1577 // Transform (and (srl x, C2), C1)
1578 // -> (<bfextract> x, msb, lsb)
1579 //
1580 // Make sure to keep this below the SRLIW cases, as we always want to
1581 // prefer the more common instruction.
1582 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1583 const unsigned Lsb = C2;
1584 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1585 return;
1586
1587 // (srli (slli x, c3-c2), c3).
1588 // Skip if we could use (zext.w (sraiw X, C2)).
1589 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1590 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1591 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1592 // Also Skip if we can use bexti or th.tst.
1593 Skip |= HasBitTest && Leading == XLen - 1;
1594 if (OneUseOrZExtW && !Skip) {
1595 SDNode *SLLI = CurDAG->getMachineNode(
1596 RISCV::SLLI, DL, VT, X,
1597 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1598 SDNode *SRLI = CurDAG->getMachineNode(
1599 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1600 CurDAG->getTargetConstant(Leading, DL, VT));
1601 ReplaceNode(Node, SRLI);
1602 return;
1603 }
1604 }
1605 }
1606
1607 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1608 // shifted by c2 bits with c3 leading zeros.
1609 if (LeftShift && isShiftedMask_64(C1)) {
1610 unsigned Leading = XLen - llvm::bit_width(C1);
1611
1612 if (C2 + Leading < XLen &&
1613 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1614 // Use slli.uw when possible.
1615 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1616 SDNode *SLLI_UW =
1617 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1618 CurDAG->getTargetConstant(C2, DL, VT));
1619 ReplaceNode(Node, SLLI_UW);
1620 return;
1621 }
1622
1623 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1624 // available.
1625 // Transform (and (shl x, c2), c1)
1626 // -> (<bfinsert> x, msb, lsb)
1627 // e.g.
1628 // (and (shl x, 12), 0x00fff000)
1629 // If XLen = 32 and C2 = 12, then
1630 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1631 const unsigned Msb = XLen - Leading - 1;
1632 const unsigned Lsb = C2;
1633 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1634 return;
1635
1636 if (OneUseOrZExtW && !IsCANDI) {
1637 // (packh x0, X)
1638 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1639 SDNode *PACKH = CurDAG->getMachineNode(
1640 RISCV::PACKH, DL, VT,
1641 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1642 ReplaceNode(Node, PACKH);
1643 return;
1644 }
1645 // (srli (slli c2+c3), c3)
1646 SDNode *SLLI = CurDAG->getMachineNode(
1647 RISCV::SLLI, DL, VT, X,
1648 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1649 SDNode *SRLI = CurDAG->getMachineNode(
1650 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1651 CurDAG->getTargetConstant(Leading, DL, VT));
1652 ReplaceNode(Node, SRLI);
1653 return;
1654 }
1655 }
1656 }
1657
1658 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1659 // shifted mask with c2 leading zeros and c3 trailing zeros.
1660 if (!LeftShift && isShiftedMask_64(C1)) {
1661 unsigned Leading = XLen - llvm::bit_width(C1);
1662 unsigned Trailing = llvm::countr_zero(C1);
1663 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1664 !IsCANDI) {
1665 unsigned SrliOpc = RISCV::SRLI;
1666 // If the input is zexti32 we should use SRLIW.
1667 if (X.getOpcode() == ISD::AND &&
1668 isa<ConstantSDNode>(X.getOperand(1)) &&
1669 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1670 SrliOpc = RISCV::SRLIW;
1671 X = X.getOperand(0);
1672 }
1673 SDNode *SRLI = CurDAG->getMachineNode(
1674 SrliOpc, DL, VT, X,
1675 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1676 SDNode *SLLI = CurDAG->getMachineNode(
1677 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1678 CurDAG->getTargetConstant(Trailing, DL, VT));
1679 ReplaceNode(Node, SLLI);
1680 return;
1681 }
1682 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1683 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1684 OneUseOrZExtW && !IsCANDI) {
1685 SDNode *SRLIW = CurDAG->getMachineNode(
1686 RISCV::SRLIW, DL, VT, X,
1687 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1688 SDNode *SLLI = CurDAG->getMachineNode(
1689 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1690 CurDAG->getTargetConstant(Trailing, DL, VT));
1691 ReplaceNode(Node, SLLI);
1692 return;
1693 }
1694 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1695 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1696 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1697 SDNode *SRLI = CurDAG->getMachineNode(
1698 RISCV::SRLI, DL, VT, X,
1699 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1700 SDNode *SLLI_UW = CurDAG->getMachineNode(
1701 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1702 CurDAG->getTargetConstant(Trailing, DL, VT));
1703 ReplaceNode(Node, SLLI_UW);
1704 return;
1705 }
1706 }
1707
1708 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1709 // shifted mask with no leading zeros and c3 trailing zeros.
1710 if (LeftShift && isShiftedMask_64(C1)) {
1711 unsigned Leading = XLen - llvm::bit_width(C1);
1712 unsigned Trailing = llvm::countr_zero(C1);
1713 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1714 SDNode *SRLI = CurDAG->getMachineNode(
1715 RISCV::SRLI, DL, VT, X,
1716 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1717 SDNode *SLLI = CurDAG->getMachineNode(
1718 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1719 CurDAG->getTargetConstant(Trailing, DL, VT));
1720 ReplaceNode(Node, SLLI);
1721 return;
1722 }
1723 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1724 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1725 SDNode *SRLIW = CurDAG->getMachineNode(
1726 RISCV::SRLIW, DL, VT, X,
1727 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1728 SDNode *SLLI = CurDAG->getMachineNode(
1729 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1730 CurDAG->getTargetConstant(Trailing, DL, VT));
1731 ReplaceNode(Node, SLLI);
1732 return;
1733 }
1734
1735 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1736 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1737 Subtarget->hasStdExtZba()) {
1738 SDNode *SRLI = CurDAG->getMachineNode(
1739 RISCV::SRLI, DL, VT, X,
1740 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1741 SDNode *SLLI_UW = CurDAG->getMachineNode(
1742 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1743 CurDAG->getTargetConstant(Trailing, DL, VT));
1744 ReplaceNode(Node, SLLI_UW);
1745 return;
1746 }
1747 }
1748 }
1749
1750 const uint64_t C1 = N1C->getZExtValue();
1751
1752 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1753 N0.hasOneUse()) {
1754 unsigned C2 = N0.getConstantOperandVal(1);
1755 unsigned XLen = Subtarget->getXLen();
1756 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1757
1758 SDValue X = N0.getOperand(0);
1759
1760 // Prefer SRAIW + ANDI when possible.
1761 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1762 X.getOpcode() == ISD::SHL &&
1763 isa<ConstantSDNode>(X.getOperand(1)) &&
1764 X.getConstantOperandVal(1) == 32;
1765 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1766 // mask with c3 leading zeros and c2 is larger than c3.
1767 if (isMask_64(C1) && !Skip) {
1768 unsigned Leading = XLen - llvm::bit_width(C1);
1769 if (C2 > Leading) {
1770 SDNode *SRAI = CurDAG->getMachineNode(
1771 RISCV::SRAI, DL, VT, X,
1772 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1773 SDNode *SRLI = CurDAG->getMachineNode(
1774 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1775 CurDAG->getTargetConstant(Leading, DL, VT));
1776 ReplaceNode(Node, SRLI);
1777 return;
1778 }
1779 }
1780
1781 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1782 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1783 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1784 if (isShiftedMask_64(C1) && !Skip) {
1785 unsigned Leading = XLen - llvm::bit_width(C1);
1786 unsigned Trailing = llvm::countr_zero(C1);
1787 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1788 SDNode *SRAI = CurDAG->getMachineNode(
1789 RISCV::SRAI, DL, VT, N0.getOperand(0),
1790 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1791 SDNode *SRLI = CurDAG->getMachineNode(
1792 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1793 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1794 SDNode *SLLI = CurDAG->getMachineNode(
1795 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1796 CurDAG->getTargetConstant(Trailing, DL, VT));
1797 ReplaceNode(Node, SLLI);
1798 return;
1799 }
1800 }
1801 }
1802
1803 // If C1 masks off the upper bits only (but can't be formed as an
1804 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1805 // available.
1806 // Transform (and x, C1)
1807 // -> (<bfextract> x, msb, lsb)
1808 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1809 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1810 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1811 const unsigned Msb = llvm::bit_width(C1) - 1;
1812 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1813 return;
1814 }
1815
1817 return;
1818
1819 break;
1820 }
1821 case ISD::MUL: {
1822 // Special case for calculating (mul (and X, C2), C1) where the full product
1823 // fits in XLen bits. We can shift X left by the number of leading zeros in
1824 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1825 // product has XLen trailing zeros, putting it in the output of MULHU. This
1826 // can avoid materializing a constant in a register for C2.
1827
1828 // RHS should be a constant.
1829 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1830 if (!N1C || !N1C->hasOneUse())
1831 break;
1832
1833 // LHS should be an AND with constant.
1834 SDValue N0 = Node->getOperand(0);
1835 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1836 break;
1837
1839
1840 // Constant should be a mask.
1841 if (!isMask_64(C2))
1842 break;
1843
1844 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1845 // multiple users or the constant is a simm12. This prevents inserting a
1846 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1847 // make it more costly to materialize. Otherwise, using a SLLI might allow
1848 // it to be compressed.
1849 bool IsANDIOrZExt =
1850 isInt<12>(C2) ||
1851 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1852 // With XTHeadBb, we can use TH.EXTU.
1853 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1854 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1855 break;
1856 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1857 // the constant is a simm32.
1858 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1859 // With XTHeadBb, we can use TH.EXTU.
1860 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1861 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1862 break;
1863
1864 // We need to shift left the AND input and C1 by a total of XLen bits.
1865
1866 // How far left do we need to shift the AND input?
1867 unsigned XLen = Subtarget->getXLen();
1868 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1869
1870 // The constant gets shifted by the remaining amount unless that would
1871 // shift bits out.
1872 uint64_t C1 = N1C->getZExtValue();
1873 unsigned ConstantShift = XLen - LeadingZeros;
1874 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1875 break;
1876
1877 uint64_t ShiftedC1 = C1 << ConstantShift;
1878 // If this RV32, we need to sign extend the constant.
1879 if (XLen == 32)
1880 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1881
1882 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1883 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1884 SDNode *SLLI =
1885 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1886 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1887 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1888 SDValue(SLLI, 0), SDValue(Imm, 0));
1889 ReplaceNode(Node, MULHU);
1890 return;
1891 }
1892 case ISD::SMUL_LOHI:
1893 case ISD::UMUL_LOHI:
1894 case RISCVISD::WMULSU:
1895 case RISCVISD::WADDU:
1896 case RISCVISD::WSUBU: {
1897 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1898 "Unexpected opcode");
1899
1900 unsigned Opc;
1901 switch (Node->getOpcode()) {
1902 default:
1903 llvm_unreachable("Unexpected opcode");
1904 case ISD::SMUL_LOHI:
1905 Opc = RISCV::WMUL;
1906 break;
1907 case ISD::UMUL_LOHI:
1908 Opc = RISCV::WMULU;
1909 break;
1910 case RISCVISD::WMULSU:
1911 Opc = RISCV::WMULSU;
1912 break;
1913 case RISCVISD::WADDU:
1914 Opc = RISCV::WADDU;
1915 break;
1916 case RISCVISD::WSUBU:
1917 Opc = RISCV::WSUBU;
1918 break;
1919 }
1920
1921 SDNode *Result = CurDAG->getMachineNode(
1922 Opc, DL, MVT::Untyped, Node->getOperand(0), Node->getOperand(1));
1923
1924 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(Result, 0));
1925 ReplaceUses(SDValue(Node, 0), Lo);
1926 ReplaceUses(SDValue(Node, 1), Hi);
1927 CurDAG->RemoveDeadNode(Node);
1928 return;
1929 }
1930 case RISCVISD::WSLL:
1931 case RISCVISD::WSLA: {
1932 // Custom select WSLL/WSLA for RV32P.
1933 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1934 "Unexpected opcode");
1935
1936 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1937
1938 SDValue ShAmt = Node->getOperand(1);
1939
1940 unsigned Opc;
1941
1942 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1943 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1944 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1945 ShAmt = CurDAG->getTargetConstant(ShAmtC->getZExtValue(), DL, XLenVT);
1946 } else {
1947 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1948 }
1949
1950 SDNode *WShift = CurDAG->getMachineNode(Opc, DL, MVT::Untyped,
1951 Node->getOperand(0), ShAmt);
1952
1953 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(WShift, 0));
1954 ReplaceUses(SDValue(Node, 0), Lo);
1955 ReplaceUses(SDValue(Node, 1), Hi);
1956 CurDAG->RemoveDeadNode(Node);
1957 return;
1958 }
1959 case ISD::LOAD: {
1960 if (tryIndexedLoad(Node))
1961 return;
1962
1963 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1964 // We match post-incrementing load here
1966 if (Load->getAddressingMode() != ISD::POST_INC)
1967 break;
1968
1969 SDValue Chain = Node->getOperand(0);
1970 SDValue Base = Node->getOperand(1);
1971 SDValue Offset = Node->getOperand(2);
1972
1973 bool Simm12 = false;
1974 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1975
1976 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1977 int ConstantVal = ConstantOffset->getSExtValue();
1978 Simm12 = isInt<12>(ConstantVal);
1979 if (Simm12)
1980 Offset = CurDAG->getSignedTargetConstant(ConstantVal, SDLoc(Offset),
1981 Offset.getValueType());
1982 }
1983
1984 unsigned Opcode = 0;
1985 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1986 case MVT::i8:
1987 if (Simm12 && SignExtend)
1988 Opcode = RISCV::CV_LB_ri_inc;
1989 else if (Simm12 && !SignExtend)
1990 Opcode = RISCV::CV_LBU_ri_inc;
1991 else if (!Simm12 && SignExtend)
1992 Opcode = RISCV::CV_LB_rr_inc;
1993 else
1994 Opcode = RISCV::CV_LBU_rr_inc;
1995 break;
1996 case MVT::i16:
1997 if (Simm12 && SignExtend)
1998 Opcode = RISCV::CV_LH_ri_inc;
1999 else if (Simm12 && !SignExtend)
2000 Opcode = RISCV::CV_LHU_ri_inc;
2001 else if (!Simm12 && SignExtend)
2002 Opcode = RISCV::CV_LH_rr_inc;
2003 else
2004 Opcode = RISCV::CV_LHU_rr_inc;
2005 break;
2006 case MVT::i32:
2007 if (Simm12)
2008 Opcode = RISCV::CV_LW_ri_inc;
2009 else
2010 Opcode = RISCV::CV_LW_rr_inc;
2011 break;
2012 default:
2013 break;
2014 }
2015 if (!Opcode)
2016 break;
2017
2018 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
2019 Chain.getSimpleValueType(), Base,
2020 Offset, Chain));
2021 return;
2022 }
2023 break;
2024 }
2025 case RISCVISD::LD_RV32: {
2026 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
2027
2029 SDValue Chain = Node->getOperand(0);
2030 SDValue Addr = Node->getOperand(1);
2032
2033 SDValue Ops[] = {Base, Offset, Chain};
2034 MachineSDNode *New = CurDAG->getMachineNode(
2035 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
2036 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2037 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2038 ReplaceUses(SDValue(Node, 0), Lo);
2039 ReplaceUses(SDValue(Node, 1), Hi);
2040 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
2041 CurDAG->RemoveDeadNode(Node);
2042 return;
2043 }
2044 case RISCVISD::SD_RV32: {
2046 SDValue Chain = Node->getOperand(0);
2047 SDValue Addr = Node->getOperand(3);
2049
2050 SDValue Lo = Node->getOperand(1);
2051 SDValue Hi = Node->getOperand(2);
2052
2053 SDValue RegPair;
2054 // Peephole to use X0_Pair for storing zero.
2056 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2057 } else {
2058 RegPair = buildGPRPair(CurDAG, DL, MVT::Untyped, Lo, Hi);
2059 }
2060
2061 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
2062 {RegPair, Base, Offset, Chain});
2063 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2064 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
2065 CurDAG->RemoveDeadNode(Node);
2066 return;
2067 }
2068 case RISCVISD::ADDD:
2069 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2070 // widening multiply.
2072 return;
2073
2074 // Fall through to regular ADDD selection.
2075 [[fallthrough]];
2076 case RISCVISD::SUBD:
2077 case RISCVISD::PPAIRE_DB:
2078 case RISCVISD::WADDAU:
2079 case RISCVISD::WSUBAU: {
2080 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2081 assert(
2082 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2083 "Unexpected opcode");
2084
2085 SDValue Op0Lo = Node->getOperand(0);
2086 SDValue Op0Hi = Node->getOperand(1);
2087
2088 SDValue Op0;
2089 if (isNullConstant(Op0Lo) && isNullConstant(Op0Hi)) {
2090 Op0 = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2091 } else {
2092 Op0 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op0Lo, Op0Hi);
2093 }
2094
2095 SDValue Op1Lo = Node->getOperand(2);
2096 SDValue Op1Hi = Node->getOperand(3);
2097
2098 MachineSDNode *New;
2099 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU) {
2100 // WADDAU/WSUBAU: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi are
2101 // the two 32-bit values.
2102 unsigned Opc = Opcode == RISCVISD::WADDAU ? RISCV::WADDAU : RISCV::WSUBAU;
2103 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1Lo, Op1Hi);
2104 } else {
2105 SDValue Op1 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op1Lo, Op1Hi);
2106
2107 unsigned Opc;
2108 switch (Opcode) {
2109 default:
2110 llvm_unreachable("Unexpected opcode");
2111 case RISCVISD::ADDD:
2112 Opc = RISCV::ADDD;
2113 break;
2114 case RISCVISD::SUBD:
2115 Opc = RISCV::SUBD;
2116 break;
2117 case RISCVISD::PPAIRE_DB:
2118 Opc = RISCV::PPAIRE_DB;
2119 break;
2120 }
2121 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1);
2122 }
2123
2124 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2125 ReplaceUses(SDValue(Node, 0), Lo);
2126 ReplaceUses(SDValue(Node, 1), Hi);
2127 CurDAG->RemoveDeadNode(Node);
2128 return;
2129 }
2131 unsigned IntNo = Node->getConstantOperandVal(0);
2132 switch (IntNo) {
2133 // By default we do not custom select any intrinsic.
2134 default:
2135 break;
2136 case Intrinsic::riscv_vmsgeu:
2137 case Intrinsic::riscv_vmsge: {
2138 SDValue Src1 = Node->getOperand(1);
2139 SDValue Src2 = Node->getOperand(2);
2140 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2141 bool IsCmpConstant = false;
2142 bool IsCmpMinimum = false;
2143 // Only custom select scalar second operand.
2144 if (Src2.getValueType() != XLenVT)
2145 break;
2146 // Small constants are handled with patterns.
2147 int64_t CVal = 0;
2148 MVT Src1VT = Src1.getSimpleValueType();
2149 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2150 IsCmpConstant = true;
2151 CVal = C->getSExtValue();
2152 if (CVal >= -15 && CVal <= 16) {
2153 if (!IsUnsigned || CVal != 0)
2154 break;
2155 IsCmpMinimum = true;
2156 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2157 Src1VT.getScalarSizeInBits())
2158 .getSExtValue()) {
2159 IsCmpMinimum = true;
2160 }
2161 }
2162 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2163 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2164 default:
2165 llvm_unreachable("Unexpected LMUL!");
2166#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2167 case RISCVVType::lmulenum: \
2168 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2169 : RISCV::PseudoVMSLT_VX_##suffix; \
2170 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2171 : RISCV::PseudoVMSGT_VX_##suffix; \
2172 break;
2173 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2174 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2175 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2176 CASE_VMSLT_OPCODES(LMUL_1, M1)
2177 CASE_VMSLT_OPCODES(LMUL_2, M2)
2178 CASE_VMSLT_OPCODES(LMUL_4, M4)
2179 CASE_VMSLT_OPCODES(LMUL_8, M8)
2180#undef CASE_VMSLT_OPCODES
2181 }
2182 // Mask operations use the LMUL from the mask type.
2183 switch (RISCVTargetLowering::getLMUL(VT)) {
2184 default:
2185 llvm_unreachable("Unexpected LMUL!");
2186#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2187 case RISCVVType::lmulenum: \
2188 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2189 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2190 break;
2191 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2192 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2193 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2194 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2195 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2196 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2197 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2198#undef CASE_VMNAND_VMSET_OPCODES
2199 }
2200 SDValue SEW = CurDAG->getTargetConstant(
2201 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2202 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2203 SDValue VL;
2204 selectVLOp(Node->getOperand(3), VL);
2205
2206 // If vmsge(u) with minimum value, expand it to vmset.
2207 if (IsCmpMinimum) {
2209 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
2210 return;
2211 }
2212
2213 if (IsCmpConstant) {
2214 SDValue Imm =
2215 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2216
2217 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2218 {Src1, Imm, VL, SEW}));
2219 return;
2220 }
2221
2222 // Expand to
2223 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2224 SDValue Cmp = SDValue(
2225 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2226 0);
2227 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2228 {Cmp, Cmp, VL, MaskSEW}));
2229 return;
2230 }
2231 case Intrinsic::riscv_vmsgeu_mask:
2232 case Intrinsic::riscv_vmsge_mask: {
2233 SDValue Src1 = Node->getOperand(2);
2234 SDValue Src2 = Node->getOperand(3);
2235 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2236 bool IsCmpConstant = false;
2237 bool IsCmpMinimum = false;
2238 // Only custom select scalar second operand.
2239 if (Src2.getValueType() != XLenVT)
2240 break;
2241 // Small constants are handled with patterns.
2242 MVT Src1VT = Src1.getSimpleValueType();
2243 int64_t CVal = 0;
2244 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2245 IsCmpConstant = true;
2246 CVal = C->getSExtValue();
2247 if (CVal >= -15 && CVal <= 16) {
2248 if (!IsUnsigned || CVal != 0)
2249 break;
2250 IsCmpMinimum = true;
2251 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2252 Src1VT.getScalarSizeInBits())
2253 .getSExtValue()) {
2254 IsCmpMinimum = true;
2255 }
2256 }
2257 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2258 VMOROpcode, VMSGTMaskOpcode;
2259 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2260 default:
2261 llvm_unreachable("Unexpected LMUL!");
2262#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2263 case RISCVVType::lmulenum: \
2264 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2265 : RISCV::PseudoVMSLT_VX_##suffix; \
2266 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2267 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2268 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2269 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2270 break;
2271 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2272 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2273 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2274 CASE_VMSLT_OPCODES(LMUL_1, M1)
2275 CASE_VMSLT_OPCODES(LMUL_2, M2)
2276 CASE_VMSLT_OPCODES(LMUL_4, M4)
2277 CASE_VMSLT_OPCODES(LMUL_8, M8)
2278#undef CASE_VMSLT_OPCODES
2279 }
2280 // Mask operations use the LMUL from the mask type.
2281 switch (RISCVTargetLowering::getLMUL(VT)) {
2282 default:
2283 llvm_unreachable("Unexpected LMUL!");
2284#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2285 case RISCVVType::lmulenum: \
2286 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2287 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2288 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2289 break;
2290 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2291 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2292 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2297#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2298 }
2299 SDValue SEW = CurDAG->getTargetConstant(
2300 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2301 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2302 SDValue VL;
2303 selectVLOp(Node->getOperand(5), VL);
2304 SDValue MaskedOff = Node->getOperand(1);
2305 SDValue Mask = Node->getOperand(4);
2306
2307 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2308 if (IsCmpMinimum) {
2309 // We don't need vmor if the MaskedOff and the Mask are the same
2310 // value.
2311 if (Mask == MaskedOff) {
2312 ReplaceUses(Node, Mask.getNode());
2313 return;
2314 }
2316 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2317 {Mask, MaskedOff, VL, MaskSEW}));
2318 return;
2319 }
2320
2321 // If the MaskedOff value and the Mask are the same value use
2322 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2323 // This avoids needing to copy v0 to vd before starting the next sequence.
2324 if (Mask == MaskedOff) {
2325 SDValue Cmp = SDValue(
2326 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2327 0);
2328 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2329 {Mask, Cmp, VL, MaskSEW}));
2330 return;
2331 }
2332
2333 SDValue PolicyOp =
2334 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2335
2336 if (IsCmpConstant) {
2337 SDValue Imm =
2338 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2339
2340 ReplaceNode(Node, CurDAG->getMachineNode(
2341 VMSGTMaskOpcode, DL, VT,
2342 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2343 return;
2344 }
2345
2346 // Otherwise use
2347 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2348 // The result is mask undisturbed.
2349 // We use the same instructions to emulate mask agnostic behavior, because
2350 // the agnostic result can be either undisturbed or all 1.
2351 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2352 {MaskedOff, Src1, Src2, Mask,
2353 VL, SEW, PolicyOp}),
2354 0);
2355 // vmxor.mm vd, vd, v0 is used to update active value.
2356 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2357 {Cmp, Mask, VL, MaskSEW}));
2358 return;
2359 }
2360 case Intrinsic::riscv_vsetvli:
2361 case Intrinsic::riscv_vsetvlimax:
2362 return selectVSETVLI(Node);
2363 case Intrinsic::riscv_sf_vsettnt:
2364 case Intrinsic::riscv_sf_vsettm:
2365 case Intrinsic::riscv_sf_vsettk:
2366 return selectXSfmmVSET(Node);
2367 }
2368 break;
2369 }
2371 unsigned IntNo = Node->getConstantOperandVal(1);
2372 switch (IntNo) {
2373 // By default we do not custom select any intrinsic.
2374 default:
2375 break;
2376 case Intrinsic::riscv_vlseg2:
2377 case Intrinsic::riscv_vlseg3:
2378 case Intrinsic::riscv_vlseg4:
2379 case Intrinsic::riscv_vlseg5:
2380 case Intrinsic::riscv_vlseg6:
2381 case Intrinsic::riscv_vlseg7:
2382 case Intrinsic::riscv_vlseg8: {
2383 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2384 /*IsStrided*/ false);
2385 return;
2386 }
2387 case Intrinsic::riscv_vlseg2_mask:
2388 case Intrinsic::riscv_vlseg3_mask:
2389 case Intrinsic::riscv_vlseg4_mask:
2390 case Intrinsic::riscv_vlseg5_mask:
2391 case Intrinsic::riscv_vlseg6_mask:
2392 case Intrinsic::riscv_vlseg7_mask:
2393 case Intrinsic::riscv_vlseg8_mask: {
2394 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2395 /*IsStrided*/ false);
2396 return;
2397 }
2398 case Intrinsic::riscv_vlsseg2:
2399 case Intrinsic::riscv_vlsseg3:
2400 case Intrinsic::riscv_vlsseg4:
2401 case Intrinsic::riscv_vlsseg5:
2402 case Intrinsic::riscv_vlsseg6:
2403 case Intrinsic::riscv_vlsseg7:
2404 case Intrinsic::riscv_vlsseg8: {
2405 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2406 /*IsStrided*/ true);
2407 return;
2408 }
2409 case Intrinsic::riscv_vlsseg2_mask:
2410 case Intrinsic::riscv_vlsseg3_mask:
2411 case Intrinsic::riscv_vlsseg4_mask:
2412 case Intrinsic::riscv_vlsseg5_mask:
2413 case Intrinsic::riscv_vlsseg6_mask:
2414 case Intrinsic::riscv_vlsseg7_mask:
2415 case Intrinsic::riscv_vlsseg8_mask: {
2416 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2417 /*IsStrided*/ true);
2418 return;
2419 }
2420 case Intrinsic::riscv_vloxseg2:
2421 case Intrinsic::riscv_vloxseg3:
2422 case Intrinsic::riscv_vloxseg4:
2423 case Intrinsic::riscv_vloxseg5:
2424 case Intrinsic::riscv_vloxseg6:
2425 case Intrinsic::riscv_vloxseg7:
2426 case Intrinsic::riscv_vloxseg8:
2427 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2428 /*IsOrdered*/ true);
2429 return;
2430 case Intrinsic::riscv_vluxseg2:
2431 case Intrinsic::riscv_vluxseg3:
2432 case Intrinsic::riscv_vluxseg4:
2433 case Intrinsic::riscv_vluxseg5:
2434 case Intrinsic::riscv_vluxseg6:
2435 case Intrinsic::riscv_vluxseg7:
2436 case Intrinsic::riscv_vluxseg8:
2437 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2438 /*IsOrdered*/ false);
2439 return;
2440 case Intrinsic::riscv_vloxseg2_mask:
2441 case Intrinsic::riscv_vloxseg3_mask:
2442 case Intrinsic::riscv_vloxseg4_mask:
2443 case Intrinsic::riscv_vloxseg5_mask:
2444 case Intrinsic::riscv_vloxseg6_mask:
2445 case Intrinsic::riscv_vloxseg7_mask:
2446 case Intrinsic::riscv_vloxseg8_mask:
2447 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2448 /*IsOrdered*/ true);
2449 return;
2450 case Intrinsic::riscv_vluxseg2_mask:
2451 case Intrinsic::riscv_vluxseg3_mask:
2452 case Intrinsic::riscv_vluxseg4_mask:
2453 case Intrinsic::riscv_vluxseg5_mask:
2454 case Intrinsic::riscv_vluxseg6_mask:
2455 case Intrinsic::riscv_vluxseg7_mask:
2456 case Intrinsic::riscv_vluxseg8_mask:
2457 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2458 /*IsOrdered*/ false);
2459 return;
2460 case Intrinsic::riscv_vlseg8ff:
2461 case Intrinsic::riscv_vlseg7ff:
2462 case Intrinsic::riscv_vlseg6ff:
2463 case Intrinsic::riscv_vlseg5ff:
2464 case Intrinsic::riscv_vlseg4ff:
2465 case Intrinsic::riscv_vlseg3ff:
2466 case Intrinsic::riscv_vlseg2ff: {
2467 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2468 return;
2469 }
2470 case Intrinsic::riscv_vlseg8ff_mask:
2471 case Intrinsic::riscv_vlseg7ff_mask:
2472 case Intrinsic::riscv_vlseg6ff_mask:
2473 case Intrinsic::riscv_vlseg5ff_mask:
2474 case Intrinsic::riscv_vlseg4ff_mask:
2475 case Intrinsic::riscv_vlseg3ff_mask:
2476 case Intrinsic::riscv_vlseg2ff_mask: {
2477 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2478 return;
2479 }
2480 case Intrinsic::riscv_vloxei:
2481 case Intrinsic::riscv_vloxei_mask:
2482 case Intrinsic::riscv_vluxei:
2483 case Intrinsic::riscv_vluxei_mask: {
2484 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2485 IntNo == Intrinsic::riscv_vluxei_mask;
2486 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2487 IntNo == Intrinsic::riscv_vloxei_mask;
2488
2489 MVT VT = Node->getSimpleValueType(0);
2490 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2491
2492 unsigned CurOp = 2;
2493 SmallVector<SDValue, 8> Operands;
2494 Operands.push_back(Node->getOperand(CurOp++));
2495
2496 MVT IndexVT;
2497 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2498 /*IsStridedOrIndexed*/ true, Operands,
2499 /*IsLoad=*/true, &IndexVT);
2500
2502 "Element count mismatch");
2503
2506 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2507 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2508 reportFatalUsageError("The V extension does not support EEW=64 for "
2509 "index values when XLEN=32");
2510 }
2511 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2512 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2513 static_cast<unsigned>(IndexLMUL));
2514 MachineSDNode *Load =
2515 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2516
2517 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2518
2519 ReplaceNode(Node, Load);
2520 return;
2521 }
2522 case Intrinsic::riscv_vlm:
2523 case Intrinsic::riscv_vle:
2524 case Intrinsic::riscv_vle_mask:
2525 case Intrinsic::riscv_vlse:
2526 case Intrinsic::riscv_vlse_mask: {
2527 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2528 IntNo == Intrinsic::riscv_vlse_mask;
2529 bool IsStrided =
2530 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2531
2532 MVT VT = Node->getSimpleValueType(0);
2533 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2534
2535 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2536 // operand at the IR level. In pseudos, they have both policy and
2537 // passthru operand. The passthru operand is needed to track the
2538 // "tail undefined" state, and the policy is there just for
2539 // for consistency - it will always be "don't care" for the
2540 // unmasked form.
2541 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2542 unsigned CurOp = 2;
2543 SmallVector<SDValue, 8> Operands;
2544 if (HasPassthruOperand)
2545 Operands.push_back(Node->getOperand(CurOp++));
2546 else {
2547 // We eagerly lower to implicit_def (instead of undef), as we
2548 // otherwise fail to select nodes such as: nxv1i1 = undef
2549 SDNode *Passthru =
2550 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2551 Operands.push_back(SDValue(Passthru, 0));
2552 }
2553 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2554 Operands, /*IsLoad=*/true);
2555
2557 const RISCV::VLEPseudo *P =
2558 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2559 static_cast<unsigned>(LMUL));
2560 MachineSDNode *Load =
2561 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2562
2563 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2564
2565 ReplaceNode(Node, Load);
2566 return;
2567 }
2568 case Intrinsic::riscv_vleff:
2569 case Intrinsic::riscv_vleff_mask: {
2570 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2571
2572 MVT VT = Node->getSimpleValueType(0);
2573 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2574
2575 unsigned CurOp = 2;
2576 SmallVector<SDValue, 7> Operands;
2577 Operands.push_back(Node->getOperand(CurOp++));
2578 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2579 /*IsStridedOrIndexed*/ false, Operands,
2580 /*IsLoad=*/true);
2581
2583 const RISCV::VLEPseudo *P =
2584 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2585 Log2SEW, static_cast<unsigned>(LMUL));
2586 MachineSDNode *Load = CurDAG->getMachineNode(
2587 P->Pseudo, DL, Node->getVTList(), Operands);
2588 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2589
2590 ReplaceNode(Node, Load);
2591 return;
2592 }
2593 case Intrinsic::riscv_nds_vln:
2594 case Intrinsic::riscv_nds_vln_mask:
2595 case Intrinsic::riscv_nds_vlnu:
2596 case Intrinsic::riscv_nds_vlnu_mask: {
2597 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2598 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2599 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2600 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2601
2602 MVT VT = Node->getSimpleValueType(0);
2603 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2604 unsigned CurOp = 2;
2605 SmallVector<SDValue, 8> Operands;
2606
2607 Operands.push_back(Node->getOperand(CurOp++));
2608 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2609 /*IsStridedOrIndexed=*/false, Operands,
2610 /*IsLoad=*/true);
2611
2613 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2614 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2615 MachineSDNode *Load =
2616 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2617
2618 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2619 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2620
2621 ReplaceNode(Node, Load);
2622 return;
2623 }
2624 }
2625 break;
2626 }
2627 case ISD::INTRINSIC_VOID: {
2628 unsigned IntNo = Node->getConstantOperandVal(1);
2629 switch (IntNo) {
2630 case Intrinsic::riscv_vsseg2:
2631 case Intrinsic::riscv_vsseg3:
2632 case Intrinsic::riscv_vsseg4:
2633 case Intrinsic::riscv_vsseg5:
2634 case Intrinsic::riscv_vsseg6:
2635 case Intrinsic::riscv_vsseg7:
2636 case Intrinsic::riscv_vsseg8: {
2637 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2638 /*IsStrided*/ false);
2639 return;
2640 }
2641 case Intrinsic::riscv_vsseg2_mask:
2642 case Intrinsic::riscv_vsseg3_mask:
2643 case Intrinsic::riscv_vsseg4_mask:
2644 case Intrinsic::riscv_vsseg5_mask:
2645 case Intrinsic::riscv_vsseg6_mask:
2646 case Intrinsic::riscv_vsseg7_mask:
2647 case Intrinsic::riscv_vsseg8_mask: {
2648 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2649 /*IsStrided*/ false);
2650 return;
2651 }
2652 case Intrinsic::riscv_vssseg2:
2653 case Intrinsic::riscv_vssseg3:
2654 case Intrinsic::riscv_vssseg4:
2655 case Intrinsic::riscv_vssseg5:
2656 case Intrinsic::riscv_vssseg6:
2657 case Intrinsic::riscv_vssseg7:
2658 case Intrinsic::riscv_vssseg8: {
2659 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2660 /*IsStrided*/ true);
2661 return;
2662 }
2663 case Intrinsic::riscv_vssseg2_mask:
2664 case Intrinsic::riscv_vssseg3_mask:
2665 case Intrinsic::riscv_vssseg4_mask:
2666 case Intrinsic::riscv_vssseg5_mask:
2667 case Intrinsic::riscv_vssseg6_mask:
2668 case Intrinsic::riscv_vssseg7_mask:
2669 case Intrinsic::riscv_vssseg8_mask: {
2670 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2671 /*IsStrided*/ true);
2672 return;
2673 }
2674 case Intrinsic::riscv_vsoxseg2:
2675 case Intrinsic::riscv_vsoxseg3:
2676 case Intrinsic::riscv_vsoxseg4:
2677 case Intrinsic::riscv_vsoxseg5:
2678 case Intrinsic::riscv_vsoxseg6:
2679 case Intrinsic::riscv_vsoxseg7:
2680 case Intrinsic::riscv_vsoxseg8:
2681 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2682 /*IsOrdered*/ true);
2683 return;
2684 case Intrinsic::riscv_vsuxseg2:
2685 case Intrinsic::riscv_vsuxseg3:
2686 case Intrinsic::riscv_vsuxseg4:
2687 case Intrinsic::riscv_vsuxseg5:
2688 case Intrinsic::riscv_vsuxseg6:
2689 case Intrinsic::riscv_vsuxseg7:
2690 case Intrinsic::riscv_vsuxseg8:
2691 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2692 /*IsOrdered*/ false);
2693 return;
2694 case Intrinsic::riscv_vsoxseg2_mask:
2695 case Intrinsic::riscv_vsoxseg3_mask:
2696 case Intrinsic::riscv_vsoxseg4_mask:
2697 case Intrinsic::riscv_vsoxseg5_mask:
2698 case Intrinsic::riscv_vsoxseg6_mask:
2699 case Intrinsic::riscv_vsoxseg7_mask:
2700 case Intrinsic::riscv_vsoxseg8_mask:
2701 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2702 /*IsOrdered*/ true);
2703 return;
2704 case Intrinsic::riscv_vsuxseg2_mask:
2705 case Intrinsic::riscv_vsuxseg3_mask:
2706 case Intrinsic::riscv_vsuxseg4_mask:
2707 case Intrinsic::riscv_vsuxseg5_mask:
2708 case Intrinsic::riscv_vsuxseg6_mask:
2709 case Intrinsic::riscv_vsuxseg7_mask:
2710 case Intrinsic::riscv_vsuxseg8_mask:
2711 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2712 /*IsOrdered*/ false);
2713 return;
2714 case Intrinsic::riscv_vsoxei:
2715 case Intrinsic::riscv_vsoxei_mask:
2716 case Intrinsic::riscv_vsuxei:
2717 case Intrinsic::riscv_vsuxei_mask: {
2718 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2719 IntNo == Intrinsic::riscv_vsuxei_mask;
2720 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2721 IntNo == Intrinsic::riscv_vsoxei_mask;
2722
2723 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2724 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2725
2726 unsigned CurOp = 2;
2727 SmallVector<SDValue, 8> Operands;
2728 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2729
2730 MVT IndexVT;
2731 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2732 /*IsStridedOrIndexed*/ true, Operands,
2733 /*IsLoad=*/false, &IndexVT);
2734
2736 "Element count mismatch");
2737
2740 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2741 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2742 reportFatalUsageError("The V extension does not support EEW=64 for "
2743 "index values when XLEN=32");
2744 }
2745 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2746 IsMasked, IsOrdered, IndexLog2EEW,
2747 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2748 MachineSDNode *Store =
2749 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2750
2751 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2752
2753 ReplaceNode(Node, Store);
2754 return;
2755 }
2756 case Intrinsic::riscv_vsm:
2757 case Intrinsic::riscv_vse:
2758 case Intrinsic::riscv_vse_mask:
2759 case Intrinsic::riscv_vsse:
2760 case Intrinsic::riscv_vsse_mask: {
2761 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2762 IntNo == Intrinsic::riscv_vsse_mask;
2763 bool IsStrided =
2764 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2765
2766 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2767 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2768
2769 unsigned CurOp = 2;
2770 SmallVector<SDValue, 8> Operands;
2771 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2772
2773 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2774 Operands);
2775
2777 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2778 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2779 MachineSDNode *Store =
2780 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2781 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2782
2783 ReplaceNode(Node, Store);
2784 return;
2785 }
2786 case Intrinsic::riscv_sf_vc_x_se:
2787 case Intrinsic::riscv_sf_vc_i_se:
2789 return;
2790 case Intrinsic::riscv_sf_vlte8:
2791 case Intrinsic::riscv_sf_vlte16:
2792 case Intrinsic::riscv_sf_vlte32:
2793 case Intrinsic::riscv_sf_vlte64: {
2794 unsigned Log2SEW;
2795 unsigned PseudoInst;
2796 switch (IntNo) {
2797 case Intrinsic::riscv_sf_vlte8:
2798 PseudoInst = RISCV::PseudoSF_VLTE8;
2799 Log2SEW = 3;
2800 break;
2801 case Intrinsic::riscv_sf_vlte16:
2802 PseudoInst = RISCV::PseudoSF_VLTE16;
2803 Log2SEW = 4;
2804 break;
2805 case Intrinsic::riscv_sf_vlte32:
2806 PseudoInst = RISCV::PseudoSF_VLTE32;
2807 Log2SEW = 5;
2808 break;
2809 case Intrinsic::riscv_sf_vlte64:
2810 PseudoInst = RISCV::PseudoSF_VLTE64;
2811 Log2SEW = 6;
2812 break;
2813 }
2814
2815 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2816 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2817 SDValue Operands[] = {Node->getOperand(2),
2818 Node->getOperand(3),
2819 Node->getOperand(4),
2820 SEWOp,
2821 TWidenOp,
2822 Node->getOperand(0)};
2823
2824 MachineSDNode *TileLoad =
2825 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2826 CurDAG->setNodeMemRefs(TileLoad,
2827 {cast<MemSDNode>(Node)->getMemOperand()});
2828
2829 ReplaceNode(Node, TileLoad);
2830 return;
2831 }
2832 case Intrinsic::riscv_sf_mm_s_s:
2833 case Intrinsic::riscv_sf_mm_s_u:
2834 case Intrinsic::riscv_sf_mm_u_s:
2835 case Intrinsic::riscv_sf_mm_u_u:
2836 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2837 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2838 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2839 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2840 case Intrinsic::riscv_sf_mm_f_f: {
2841 bool HasFRM = false;
2842 unsigned PseudoInst;
2843 switch (IntNo) {
2844 case Intrinsic::riscv_sf_mm_s_s:
2845 PseudoInst = RISCV::PseudoSF_MM_S_S;
2846 break;
2847 case Intrinsic::riscv_sf_mm_s_u:
2848 PseudoInst = RISCV::PseudoSF_MM_S_U;
2849 break;
2850 case Intrinsic::riscv_sf_mm_u_s:
2851 PseudoInst = RISCV::PseudoSF_MM_U_S;
2852 break;
2853 case Intrinsic::riscv_sf_mm_u_u:
2854 PseudoInst = RISCV::PseudoSF_MM_U_U;
2855 break;
2856 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2857 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2858 HasFRM = true;
2859 break;
2860 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2861 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2862 HasFRM = true;
2863 break;
2864 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2865 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2866 HasFRM = true;
2867 break;
2868 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2869 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2870 HasFRM = true;
2871 break;
2872 case Intrinsic::riscv_sf_mm_f_f:
2873 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2874 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2875 else
2876 PseudoInst = RISCV::PseudoSF_MM_F_F;
2877 HasFRM = true;
2878 break;
2879 }
2880 uint64_t TileNum = Node->getConstantOperandVal(2);
2881 SDValue Op1 = Node->getOperand(3);
2882 SDValue Op2 = Node->getOperand(4);
2883 MVT VT = Op1->getSimpleValueType(0);
2884 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2885 SDValue TmOp = Node->getOperand(5);
2886 SDValue TnOp = Node->getOperand(6);
2887 SDValue TkOp = Node->getOperand(7);
2888 SDValue TWidenOp = Node->getOperand(8);
2889 SDValue Chain = Node->getOperand(0);
2890
2891 // sf.mm.f.f with sew=32, twiden=2 is invalid
2892 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2893 TWidenOp->getAsZExtVal() == 2)
2894 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2895
2896 SmallVector<SDValue, 10> Operands(
2897 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2898 if (HasFRM)
2899 Operands.push_back(
2900 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2901 Operands.append({TmOp, TnOp, TkOp,
2902 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2903 Chain});
2904
2905 auto *NewNode =
2906 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2907
2908 ReplaceNode(Node, NewNode);
2909 return;
2910 }
2911 case Intrinsic::riscv_sf_vtzero_t: {
2912 uint64_t TileNum = Node->getConstantOperandVal(2);
2913 SDValue Tm = Node->getOperand(3);
2914 SDValue Tn = Node->getOperand(4);
2915 SDValue Log2SEW = Node->getOperand(5);
2916 SDValue TWiden = Node->getOperand(6);
2917 SDValue Chain = Node->getOperand(0);
2918 auto *NewNode = CurDAG->getMachineNode(
2919 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2920 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2921 TWiden, Chain});
2922
2923 ReplaceNode(Node, NewNode);
2924 return;
2925 }
2926 }
2927 break;
2928 }
2929 case ISD::BITCAST: {
2930 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2931 // Just drop bitcasts between vectors if both are fixed or both are
2932 // scalable.
2933 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2934 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2935 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2936 CurDAG->RemoveDeadNode(Node);
2937 return;
2938 }
2939 if (Subtarget->hasStdExtP()) {
2940 bool Is32BitCast =
2941 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2942 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2943 bool Is64BitCast =
2944 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2945 SrcVT == MVT::v2i32)) ||
2946 (SrcVT == MVT::i64 &&
2947 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2948 if (Is32BitCast || Is64BitCast) {
2949 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2950 CurDAG->RemoveDeadNode(Node);
2951 return;
2952 }
2953 }
2954 break;
2955 }
2956 case ISD::SPLAT_VECTOR: {
2957 if (!Subtarget->hasStdExtP())
2958 break;
2959 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Node->getOperand(0))) {
2960 bool IsDoubleWide = Subtarget->isPExtPackedDoubleType(VT);
2961
2962 if (ConstNode->isZero()) {
2963 MCPhysReg X0Reg = IsDoubleWide ? RISCV::X0_Pair : RISCV::X0;
2964 SDValue New =
2965 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, X0Reg, VT);
2966 ReplaceNode(Node, New.getNode());
2967 return;
2968 }
2969
2970 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2971 APInt Val = ConstNode->getAPIntValue().trunc(EltSize);
2972
2973 // Use LI for all ones since it can be compressed to c.li.
2974 if (Val.isAllOnes() && !IsDoubleWide) {
2975 SDNode *NewNode = CurDAG->getMachineNode(
2976 RISCV::ADDI, DL, VT, CurDAG->getRegister(RISCV::X0, VT),
2977 CurDAG->getAllOnesConstant(DL, XLenVT, /*IsTarget=*/true));
2978 ReplaceNode(Node, NewNode);
2979 return;
2980 }
2981
2982 // Find the smallest splat.
2983 if (Val.getBitWidth() > 16 && Val.isSplat(16))
2984 Val = Val.trunc(16);
2985 if (Val.getBitWidth() > 8 && Val.isSplat(8))
2986 Val = Val.trunc(8);
2987
2988 EltSize = Val.getBitWidth();
2989 int64_t Imm = Val.getSExtValue();
2990
2991 unsigned Opc = 0;
2992 if (EltSize == 8) {
2993 Opc = IsDoubleWide ? RISCV::PLI_DB : RISCV::PLI_B;
2994 } else if (EltSize == 16 && isInt<10>(Imm)) {
2995 Opc = IsDoubleWide ? RISCV::PLI_DH : RISCV::PLI_H;
2996 } else if (!IsDoubleWide && EltSize == 32 && isInt<10>(Imm)) {
2997 Opc = RISCV::PLI_W;
2998 } else if (EltSize == 16 && isShiftedInt<10, 6>(Imm)) {
2999 Opc = IsDoubleWide ? RISCV::PLUI_DH : RISCV::PLUI_H;
3000 Imm = Imm >> 6;
3001 } else if (!IsDoubleWide && EltSize == 32 && isShiftedInt<10, 22>(Imm)) {
3002 Opc = RISCV::PLUI_W;
3003 Imm = Imm >> 22;
3004 }
3005
3006 if (Opc) {
3007 SDNode *NewNode = CurDAG->getMachineNode(
3008 Opc, DL, VT, CurDAG->getSignedTargetConstant(Imm, DL, XLenVT));
3009 ReplaceNode(Node, NewNode);
3010 return;
3011 }
3012 }
3013
3014 break;
3015 }
3017 if (Subtarget->hasStdExtP()) {
3018 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
3019 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
3020 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
3021 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3022 CurDAG->RemoveDeadNode(Node);
3023 return;
3024 }
3025 }
3026 break;
3028 case RISCVISD::TUPLE_INSERT: {
3029 SDValue V = Node->getOperand(0);
3030 SDValue SubV = Node->getOperand(1);
3031 SDLoc DL(SubV);
3032 auto Idx = Node->getConstantOperandVal(2);
3033 MVT SubVecVT = SubV.getSimpleValueType();
3034
3035 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3036 MVT SubVecContainerVT = SubVecVT;
3037 // Establish the correct scalable-vector types for any fixed-length type.
3038 if (SubVecVT.isFixedLengthVector()) {
3039 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
3041 [[maybe_unused]] bool ExactlyVecRegSized =
3042 Subtarget->expandVScale(SubVecVT.getSizeInBits())
3043 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
3044 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
3045 .getKnownMinValue()));
3046 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
3047 }
3048 MVT ContainerVT = VT;
3049 if (VT.isFixedLengthVector())
3050 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
3051
3052 const auto *TRI = Subtarget->getRegisterInfo();
3053 unsigned SubRegIdx;
3054 std::tie(SubRegIdx, Idx) =
3056 ContainerVT, SubVecContainerVT, Idx, TRI);
3057
3058 // If the Idx hasn't been completely eliminated then this is a subvector
3059 // insert which doesn't naturally align to a vector register. These must
3060 // be handled using instructions to manipulate the vector registers.
3061 if (Idx != 0)
3062 break;
3063
3064 RISCVVType::VLMUL SubVecLMUL =
3065 RISCVTargetLowering::getLMUL(SubVecContainerVT);
3066 [[maybe_unused]] bool IsSubVecPartReg =
3067 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3068 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3069 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3070 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3071 V.isUndef()) &&
3072 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3073 "the subvector is smaller than a full-sized register");
3074
3075 // If we haven't set a SubRegIdx, then we must be going between
3076 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3077 if (SubRegIdx == RISCV::NoSubRegister) {
3078 unsigned InRegClassID =
3081 InRegClassID &&
3082 "Unexpected subvector extraction");
3083 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3084 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3085 DL, VT, SubV, RC);
3086 ReplaceNode(Node, NewNode);
3087 return;
3088 }
3089
3090 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
3091 ReplaceNode(Node, Insert.getNode());
3092 return;
3093 }
3095 case RISCVISD::TUPLE_EXTRACT: {
3096 SDValue V = Node->getOperand(0);
3097 auto Idx = Node->getConstantOperandVal(1);
3098 MVT InVT = V.getSimpleValueType();
3099
3100 // Handle P-extension extract_subvector for v2i16 from v4i16 and v4i8 from
3101 // v8i8
3102 if (Subtarget->hasStdExtP() && !Subtarget->is64Bit() &&
3103 ((InVT == MVT::v4i16 && VT == MVT::v2i16) ||
3104 (InVT == MVT::v8i8 && VT == MVT::v4i8))) {
3105 unsigned NumElts = VT.getVectorNumElements();
3106 if (Idx != 0 && Idx != NumElts)
3107 break;
3108
3109 unsigned SubRegIdx = Idx == 0 ? RISCV::sub_gpr_even : RISCV::sub_gpr_odd;
3110 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3111 ReplaceNode(Node, Extract.getNode());
3112 return;
3113 }
3114
3115 SDLoc DL(V);
3116
3117 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3118 MVT SubVecContainerVT = VT;
3119 // Establish the correct scalable-vector types for any fixed-length type.
3120 if (VT.isFixedLengthVector()) {
3121 assert(Idx == 0);
3122 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3123 }
3124 if (InVT.isFixedLengthVector())
3125 InVT = TLI.getContainerForFixedLengthVector(InVT);
3126
3127 const auto *TRI = Subtarget->getRegisterInfo();
3128 unsigned SubRegIdx;
3129 std::tie(SubRegIdx, Idx) =
3131 InVT, SubVecContainerVT, Idx, TRI);
3132
3133 // If the Idx hasn't been completely eliminated then this is a subvector
3134 // extract which doesn't naturally align to a vector register. These must
3135 // be handled using instructions to manipulate the vector registers.
3136 if (Idx != 0)
3137 break;
3138
3139 // If we haven't set a SubRegIdx, then we must be going between
3140 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3141 if (SubRegIdx == RISCV::NoSubRegister) {
3142 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
3144 InRegClassID &&
3145 "Unexpected subvector extraction");
3146 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3147 SDNode *NewNode =
3148 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3149 ReplaceNode(Node, NewNode);
3150 return;
3151 }
3152
3153 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3154 ReplaceNode(Node, Extract.getNode());
3155 return;
3156 }
3157 case RISCVISD::VMV_S_X_VL:
3158 case RISCVISD::VFMV_S_F_VL:
3159 case RISCVISD::VMV_V_X_VL:
3160 case RISCVISD::VFMV_V_F_VL: {
3161 // Try to match splat of a scalar load to a strided load with stride of x0.
3162 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3163 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3164 if (!Node->getOperand(0).isUndef())
3165 break;
3166 SDValue Src = Node->getOperand(1);
3167 auto *Ld = dyn_cast<LoadSDNode>(Src);
3168 // Can't fold load update node because the second
3169 // output is used so that load update node can't be removed.
3170 if (!Ld || Ld->isIndexed())
3171 break;
3172 EVT MemVT = Ld->getMemoryVT();
3173 // The memory VT should be the same size as the element type.
3174 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3175 break;
3176 if (!IsProfitableToFold(Src, Node, Node) ||
3177 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
3178 break;
3179
3180 SDValue VL;
3181 if (IsScalarMove) {
3182 // We could deal with more VL if we update the VSETVLI insert pass to
3183 // avoid introducing more VSETVLI.
3184 if (!isOneConstant(Node->getOperand(2)))
3185 break;
3186 selectVLOp(Node->getOperand(2), VL);
3187 } else
3188 selectVLOp(Node->getOperand(2), VL);
3189
3190 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
3191 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
3192
3193 // If VL=1, then we don't need to do a strided load and can just do a
3194 // regular load.
3195 bool IsStrided = !isOneConstant(VL);
3196
3197 // Only do a strided load if we have optimized zero-stride vector load.
3198 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3199 break;
3200
3201 SmallVector<SDValue> Operands = {
3202 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
3203 Ld->getBasePtr()};
3204 if (IsStrided)
3205 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
3207 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
3208 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
3209
3211 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3212 /*IsMasked*/ false, IsStrided, /*FF*/ false,
3213 Log2SEW, static_cast<unsigned>(LMUL));
3214 MachineSDNode *Load =
3215 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
3216 // Update the chain.
3217 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
3218 // Record the mem-refs
3219 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
3220 // Replace the splat with the vlse.
3221 ReplaceNode(Node, Load);
3222 return;
3223 }
3224 case RISCVISD::LPAD_CALL:
3225 case RISCVISD::LPAD_CALL_INDIRECT: {
3226 bool IsIndirect = Opcode == RISCVISD::LPAD_CALL_INDIRECT;
3227 unsigned PseudoOpc = IsIndirect ? RISCV::PseudoCALLIndirectLpadAlign
3228 : RISCV::PseudoCALLLpadAlign;
3229
3230 uint32_t LpadLabel = 0;
3231 if (PreferredLandingPadLabel.getNumOccurrences() > 0) {
3233 report_fatal_error("riscv-landing-pad-label=<val>, <val> needs to fit "
3234 "in unsigned 20-bits");
3235 LpadLabel = PreferredLandingPadLabel;
3236 }
3237
3239 Ops.push_back(Node->getOperand(1));
3240 Ops.push_back(CurDAG->getTargetConstant(LpadLabel, DL, XLenVT));
3241 Ops.push_back(Node->getOperand(0));
3242 if (Node->getGluedNode())
3243 Ops.push_back(Node->getOperand(Node->getNumOperands() - 1));
3244
3246 CurDAG->getMachineNode(PseudoOpc, DL, Node->getVTList(), Ops));
3247 return;
3248 }
3249 case ISD::PREFETCH:
3250 // MIPS's prefetch instruction already encodes the hint within the
3251 // instruction itself, so no extra NTL hint is needed.
3252 if (Subtarget->hasVendorXMIPSCBOP())
3253 break;
3254
3255 unsigned Locality = Node->getConstantOperandVal(3);
3256 if (Locality > 2)
3257 break;
3258
3259 auto *LoadStoreMem = cast<MemSDNode>(Node);
3260 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3262
3263 int NontemporalLevel = 0;
3264 switch (Locality) {
3265 case 0:
3266 NontemporalLevel = 3; // NTL.ALL
3267 break;
3268 case 1:
3269 NontemporalLevel = 1; // NTL.PALL
3270 break;
3271 case 2:
3272 NontemporalLevel = 0; // NTL.P1
3273 break;
3274 default:
3275 llvm_unreachable("unexpected locality value.");
3276 }
3277
3278 if (NontemporalLevel & 0b1)
3280 if (NontemporalLevel & 0b10)
3282 break;
3283 }
3284
3285 // Select the default instruction.
3286 SelectCode(Node);
3287}
3288
3290 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3291 std::vector<SDValue> &OutOps) {
3292 // Always produce a register and immediate operand, as expected by
3293 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3294 switch (ConstraintID) {
3297 SDValue Op0, Op1;
3298 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
3299 assert(Found && "SelectAddrRegImm should always succeed");
3300 OutOps.push_back(Op0);
3301 OutOps.push_back(Op1);
3302 return false;
3303 }
3305 OutOps.push_back(Op);
3306 OutOps.push_back(
3307 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
3308 return false;
3309 default:
3310 report_fatal_error("Unexpected asm memory constraint " +
3311 InlineAsm::getMemConstraintName(ConstraintID));
3312 }
3313
3314 return true;
3315}
3316
3318 SDValue &Offset) {
3319 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3320 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3321 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3322 return true;
3323 }
3324
3325 return false;
3326}
3327
3328// Fold constant addresses.
3329static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3330 const MVT VT, const RISCVSubtarget *Subtarget,
3332 bool IsPrefetch = false) {
3333 if (!isa<ConstantSDNode>(Addr))
3334 return false;
3335
3336 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3337
3338 // If the constant is a simm12, we can fold the whole constant and use X0 as
3339 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3340 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3341 int64_t Lo12 = SignExtend64<12>(CVal);
3342 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3343 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3344 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3345 return false;
3346 if (Hi) {
3347 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3348 Base = SDValue(
3349 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3350 CurDAG->getTargetConstant(Hi20, DL, VT)),
3351 0);
3352 } else {
3353 Base = CurDAG->getRegister(RISCV::X0, VT);
3354 }
3355 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3356 return true;
3357 }
3358
3359 // Ask how constant materialization would handle this constant.
3360 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3361
3362 // If the last instruction would be an ADDI, we can fold its immediate and
3363 // emit the rest of the sequence as the base.
3364 if (Seq.back().getOpcode() != RISCV::ADDI)
3365 return false;
3366 Lo12 = Seq.back().getImm();
3367 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3368 return false;
3369
3370 // Drop the last instruction.
3371 Seq.pop_back();
3372 assert(!Seq.empty() && "Expected more instructions in sequence");
3373
3374 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3375 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3376 return true;
3377}
3378
3379// Is this ADD instruction only used as the base pointer of scalar loads and
3380// stores?
3382 for (auto *User : Add->users()) {
3383 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3384 User->getOpcode() != RISCVISD::LD_RV32 &&
3385 User->getOpcode() != RISCVISD::SD_RV32 &&
3386 User->getOpcode() != ISD::ATOMIC_LOAD &&
3387 User->getOpcode() != ISD::ATOMIC_STORE)
3388 return false;
3389 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3390 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3391 VT != MVT::f64)
3392 return false;
3393 // Don't allow stores of the value. It must be used as the address.
3394 if (User->getOpcode() == ISD::STORE &&
3395 cast<StoreSDNode>(User)->getValue() == Add)
3396 return false;
3397 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3398 cast<AtomicSDNode>(User)->getVal() == Add)
3399 return false;
3400 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3401 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3402 return false;
3403 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3404 return false;
3405 }
3406
3407 return true;
3408}
3409
3411 switch (User->getOpcode()) {
3412 default:
3413 return false;
3414 case ISD::LOAD:
3415 case RISCVISD::LD_RV32:
3416 case ISD::ATOMIC_LOAD:
3417 break;
3418 case ISD::STORE:
3419 // Don't allow stores of Add. It must only be used as the address.
3421 return false;
3422 break;
3423 case RISCVISD::SD_RV32:
3424 // Don't allow stores of Add. It must only be used as the address.
3425 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3426 return false;
3427 break;
3428 case ISD::ATOMIC_STORE:
3429 // Don't allow stores of Add. It must only be used as the address.
3430 if (cast<AtomicSDNode>(User)->getVal() == Add)
3431 return false;
3432 break;
3433 }
3434
3435 return true;
3436}
3437
3438// To prevent SelectAddrRegImm from folding offsets that conflict with the
3439// fusion of PseudoMovAddr, check if the offset of every use of a given address
3440// is within the alignment.
3442 Align Alignment) {
3443 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3444 for (auto *User : Addr->users()) {
3445 // If the user is a load or store, then the offset is 0 which is always
3446 // within alignment.
3447 if (isRegImmLoadOrStore(User, Addr))
3448 continue;
3449
3450 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3451 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3452 if (!isInt<12>(CVal) || Alignment <= CVal)
3453 return false;
3454
3455 // Make sure all uses are foldable load/stores.
3456 for (auto *AddUser : User->users())
3457 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3458 return false;
3459
3460 continue;
3461 }
3462
3463 return false;
3464 }
3465
3466 return true;
3467}
3468
3470 SDValue &Offset) {
3471 if (SelectAddrFrameIndex(Addr, Base, Offset))
3472 return true;
3473
3474 SDLoc DL(Addr);
3475 MVT VT = Addr.getSimpleValueType();
3476
3477 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3478 bool CanFold = true;
3479 // Unconditionally fold if operand 1 is not a global address (e.g.
3480 // externsymbol)
3481 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3482 const DataLayout &DL = CurDAG->getDataLayout();
3483 Align Alignment = commonAlignment(
3484 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3485 if (!areOffsetsWithinAlignment(Addr, Alignment))
3486 CanFold = false;
3487 }
3488 if (CanFold) {
3489 Base = Addr.getOperand(0);
3490 Offset = Addr.getOperand(1);
3491 return true;
3492 }
3493 }
3494
3495 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3496 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3497 if (isInt<12>(CVal)) {
3498 Base = Addr.getOperand(0);
3499 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3500 SDValue LoOperand = Base.getOperand(1);
3501 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3502 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3503 // (its low part, really), then we can rely on the alignment of that
3504 // variable to provide a margin of safety before low part can overflow
3505 // the 12 bits of the load/store offset. Check if CVal falls within
3506 // that margin; if so (low part + CVal) can't overflow.
3507 const DataLayout &DL = CurDAG->getDataLayout();
3508 Align Alignment = commonAlignment(
3509 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3510 if ((CVal == 0 || Alignment > CVal) &&
3511 areOffsetsWithinAlignment(Base, Alignment)) {
3512 int64_t CombinedOffset = CVal + GA->getOffset();
3513 Base = Base.getOperand(0);
3514 Offset = CurDAG->getTargetGlobalAddress(
3515 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3516 CombinedOffset, GA->getTargetFlags());
3517 return true;
3518 }
3519 }
3520 }
3521
3522 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3523 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3524 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3525 return true;
3526 }
3527 }
3528
3529 // Handle ADD with large immediates.
3530 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3531 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3532 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3533
3534 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3535 // an ADDI for part of the offset and fold the rest into the load/store.
3536 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3537 if (CVal >= -4096 && CVal <= 4094) {
3538 int64_t Adj = CVal < 0 ? -2048 : 2047;
3539 Base = SDValue(
3540 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3541 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3542 0);
3543 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3544 return true;
3545 }
3546
3547 // For larger immediates, we might be able to save one instruction from
3548 // constant materialization by folding the Lo12 bits of the immediate into
3549 // the address. We should only do this if the ADD is only used by loads and
3550 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3551 // separately with the full materialized immediate creating extra
3552 // instructions.
3553 if (isWorthFoldingAdd(Addr) &&
3554 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3555 Offset, /*IsPrefetch=*/false)) {
3556 // Insert an ADD instruction with the materialized Hi52 bits.
3557 Base = SDValue(
3558 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3559 0);
3560 return true;
3561 }
3562 }
3563
3564 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3565 /*IsPrefetch=*/false))
3566 return true;
3567
3568 Base = Addr;
3569 Offset = CurDAG->getTargetConstant(0, DL, VT);
3570 return true;
3571}
3572
3573/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3575 SDValue &Offset) {
3576 if (SelectAddrFrameIndex(Addr, Base, Offset))
3577 return true;
3578
3579 SDLoc DL(Addr);
3580 MVT VT = Addr.getSimpleValueType();
3581
3582 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3583 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3584 if (isUInt<9>(CVal)) {
3585 Base = Addr.getOperand(0);
3586
3587 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3588 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3589 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3590 return true;
3591 }
3592 }
3593
3594 Base = Addr;
3595 Offset = CurDAG->getTargetConstant(0, DL, VT);
3596 return true;
3597}
3598
3599/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3600/// Offset should be all zeros.
3602 SDValue &Offset) {
3603 if (SelectAddrFrameIndex(Addr, Base, Offset))
3604 return true;
3605
3606 SDLoc DL(Addr);
3607 MVT VT = Addr.getSimpleValueType();
3608
3609 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3610 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3611 if (isInt<12>(CVal)) {
3612 Base = Addr.getOperand(0);
3613
3614 // Early-out if not a valid offset.
3615 if ((CVal & 0b11111) != 0) {
3616 Base = Addr;
3617 Offset = CurDAG->getTargetConstant(0, DL, VT);
3618 return true;
3619 }
3620
3621 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3622 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3623 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3624 return true;
3625 }
3626 }
3627
3628 // Handle ADD with large immediates.
3629 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3630 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3631 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3632
3633 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3634 // one instruction by folding adjustment (-2048 or 2016) into the address.
3635 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3636 int64_t Adj = CVal < 0 ? -2048 : 2016;
3637 int64_t AdjustedOffset = CVal - Adj;
3638 Base =
3639 SDValue(CurDAG->getMachineNode(
3640 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3641 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3642 0);
3643 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3644 return true;
3645 }
3646
3647 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3648 Offset, /*IsPrefetch=*/true)) {
3649 // Insert an ADD instruction with the materialized Hi52 bits.
3650 Base = SDValue(
3651 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3652 0);
3653 return true;
3654 }
3655 }
3656
3657 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3658 /*IsPrefetch=*/true))
3659 return true;
3660
3661 Base = Addr;
3662 Offset = CurDAG->getTargetConstant(0, DL, VT);
3663 return true;
3664}
3665
3666/// Return true if this a load/store that we have a RegRegScale instruction for.
3668 const RISCVSubtarget &Subtarget) {
3669 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3670 return false;
3671 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3672 if (!(VT.isScalarInteger() &&
3673 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3674 !((VT == MVT::f32 || VT == MVT::f64) &&
3675 Subtarget.hasVendorXTHeadFMemIdx()))
3676 return false;
3677 // Don't allow stores of the value. It must be used as the address.
3678 if (User->getOpcode() == ISD::STORE &&
3679 cast<StoreSDNode>(User)->getValue() == Add)
3680 return false;
3681
3682 return true;
3683}
3684
3685/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3686/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3687/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3688/// single addi and we don't have a SHXADD instruction we could use.
3689/// FIXME: May still need to check how many and what kind of users the SHL has.
3691 SDValue Add,
3692 SDValue Shift = SDValue()) {
3693 bool FoundADDI = false;
3694 for (auto *User : Add->users()) {
3695 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3696 continue;
3697
3698 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3699 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3701 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3702 return false;
3703
3704 FoundADDI = true;
3705
3706 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3707 assert(Shift.getOpcode() == ISD::SHL);
3708 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3709 if (Subtarget.hasShlAdd(ShiftAmt))
3710 return false;
3711
3712 // All users of the ADDI should be load/store.
3713 for (auto *ADDIUser : User->users())
3714 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3715 return false;
3716 }
3717
3718 return true;
3719}
3720
3722 unsigned MaxShiftAmount,
3723 SDValue &Base, SDValue &Index,
3724 SDValue &Scale) {
3725 if (Addr.getOpcode() != ISD::ADD)
3726 return false;
3727 SDValue LHS = Addr.getOperand(0);
3728 SDValue RHS = Addr.getOperand(1);
3729
3730 EVT VT = Addr.getSimpleValueType();
3731 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3732 SDValue &Shift) {
3733 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3734 return false;
3735
3736 // Only match shifts by a value in range [0, MaxShiftAmount].
3737 unsigned ShiftAmt = N.getConstantOperandVal(1);
3738 if (ShiftAmt > MaxShiftAmount)
3739 return false;
3740
3741 Index = N.getOperand(0);
3742 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3743 return true;
3744 };
3745
3746 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3747 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3748 if (LHS.getOpcode() == ISD::ADD &&
3749 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3750 isInt<12>(C1->getSExtValue())) {
3751 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3752 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3753 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3754 SDLoc(Addr), VT);
3755 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3756 LHS.getOperand(0), C1Val),
3757 0);
3758 return true;
3759 }
3760
3761 // Add is commutative so we need to check both operands.
3762 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3763 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3764 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3765 SDLoc(Addr), VT);
3766 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3767 LHS.getOperand(1), C1Val),
3768 0);
3769 return true;
3770 }
3771 }
3772
3773 // Don't match add with constants.
3774 // FIXME: Is this profitable for large constants that have 0s in the lower
3775 // 12 bits that we can materialize with LUI?
3776 return false;
3777 }
3778
3779 // Try to match a shift on the RHS.
3780 if (SelectShl(RHS, Index, Scale)) {
3781 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3782 return false;
3783 Base = LHS;
3784 return true;
3785 }
3786
3787 // Try to match a shift on the LHS.
3788 if (SelectShl(LHS, Index, Scale)) {
3789 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3790 return false;
3791 Base = RHS;
3792 return true;
3793 }
3794
3795 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3796 return false;
3797
3798 Base = LHS;
3799 Index = RHS;
3800 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3801 return true;
3802}
3803
3805 unsigned MaxShiftAmount,
3806 unsigned Bits, SDValue &Base,
3807 SDValue &Index,
3808 SDValue &Scale) {
3809 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3810 return false;
3811
3812 if (Index.getOpcode() == ISD::AND) {
3813 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3814 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3815 Index = Index.getOperand(0);
3816 return true;
3817 }
3818 }
3819
3820 return false;
3821}
3822
3824 SDValue &Offset) {
3825 if (Addr.getOpcode() != ISD::ADD)
3826 return false;
3827
3828 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3829 return false;
3830
3831 Base = Addr.getOperand(0);
3832 Offset = Addr.getOperand(1);
3833 return true;
3834}
3835
3837 SDValue &ShAmt) {
3838 ShAmt = N;
3839
3840 // Peek through zext.
3841 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3842 ShAmt = ShAmt.getOperand(0);
3843
3844 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3845 // amount. If there is an AND on the shift amount, we can bypass it if it
3846 // doesn't affect any of those bits.
3847 if (ShAmt.getOpcode() == ISD::AND &&
3848 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3849 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3850
3851 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3852 // mask that covers the bits needed to represent all shift amounts.
3853 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3854 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3855
3856 if (ShMask.isSubsetOf(AndMask)) {
3857 ShAmt = ShAmt.getOperand(0);
3858 } else {
3859 // SimplifyDemandedBits may have optimized the mask so try restoring any
3860 // bits that are known zero.
3861 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3862 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3863 return true;
3864 ShAmt = ShAmt.getOperand(0);
3865 }
3866 }
3867
3868 if (ShAmt.getOpcode() == ISD::ADD &&
3869 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3870 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3871 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3872 // to avoid the ADD.
3873 if (Imm != 0 && Imm % ShiftWidth == 0) {
3874 ShAmt = ShAmt.getOperand(0);
3875 return true;
3876 }
3877 } else if (ShAmt.getOpcode() == ISD::SUB &&
3878 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3879 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3880 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3881 // generate a NEG instead of a SUB of a constant.
3882 if (Imm != 0 && Imm % ShiftWidth == 0) {
3883 SDLoc DL(ShAmt);
3884 EVT VT = ShAmt.getValueType();
3885 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3886 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3887 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3888 ShAmt.getOperand(1));
3889 ShAmt = SDValue(Neg, 0);
3890 return true;
3891 }
3892 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3893 // to generate a NOT instead of a SUB of a constant.
3894 if (Imm % ShiftWidth == ShiftWidth - 1) {
3895 SDLoc DL(ShAmt);
3896 EVT VT = ShAmt.getValueType();
3897 MachineSDNode *Not = CurDAG->getMachineNode(
3898 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3899 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3900 ShAmt = SDValue(Not, 0);
3901 return true;
3902 }
3903 }
3904
3905 return true;
3906}
3907
3908/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3909/// check for equality with 0. This function emits instructions that convert the
3910/// seteq/setne into something that can be compared with 0.
3911/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3912/// ISD::SETNE).
3914 SDValue &Val) {
3915 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3916 "Unexpected condition code!");
3917
3918 // We're looking for a setcc.
3919 if (N->getOpcode() != ISD::SETCC)
3920 return false;
3921
3922 // Must be an equality comparison.
3923 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3924 if (CCVal != ExpectedCCVal)
3925 return false;
3926
3927 SDValue LHS = N->getOperand(0);
3928 SDValue RHS = N->getOperand(1);
3929
3930 if (!LHS.getValueType().isScalarInteger())
3931 return false;
3932
3933 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3934 if (isNullConstant(RHS)) {
3935 Val = LHS;
3936 return true;
3937 }
3938
3939 SDLoc DL(N);
3940
3941 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3942 int64_t CVal = C->getSExtValue();
3943 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3944 // non-zero otherwise.
3945 if (CVal == -2048) {
3946 Val = SDValue(
3947 CurDAG->getMachineNode(
3948 RISCV::XORI, DL, N->getValueType(0), LHS,
3949 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3950 0);
3951 return true;
3952 }
3953 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3954 // if the LHS is equal to the RHS and non-zero otherwise.
3955 if (isInt<12>(CVal) || CVal == 2048) {
3956 unsigned Opc = RISCV::ADDI;
3957 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3958 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3959 Opc = RISCV::ADDIW;
3960 LHS = LHS.getOperand(0);
3961 }
3962
3963 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3964 CurDAG->getSignedTargetConstant(
3965 -CVal, DL, N->getValueType(0))),
3966 0);
3967 return true;
3968 }
3969 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3970 Val = SDValue(
3971 CurDAG->getMachineNode(
3972 RISCV::BINVI, DL, N->getValueType(0), LHS,
3973 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3974 0);
3975 return true;
3976 }
3977 // Same as the addi case above but for larger immediates (signed 26-bit) use
3978 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3979 // anything which can be done with a single lui as it might be compressible.
3980 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3981 (CVal & 0xFFF) != 0) {
3982 Val = SDValue(
3983 CurDAG->getMachineNode(
3984 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3985 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3986 0);
3987 return true;
3988 }
3989 }
3990
3991 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3992 // equal and a non-zero value if they aren't.
3993 Val = SDValue(
3994 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3995 return true;
3996}
3997
3999 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4000 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
4001 Val = N.getOperand(0);
4002 return true;
4003 }
4004
4005 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
4006 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
4007 return N;
4008
4009 SDValue N0 = N.getOperand(0);
4010 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4011 N.getConstantOperandVal(1) == ShiftAmt &&
4012 N0.getConstantOperandVal(1) == ShiftAmt)
4013 return N0.getOperand(0);
4014
4015 return N;
4016 };
4017
4018 MVT VT = N.getSimpleValueType();
4019 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
4020 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
4021 return true;
4022 }
4023
4024 return false;
4025}
4026
4028 if (N.getOpcode() == ISD::AND) {
4029 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4030 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
4031 Val = N.getOperand(0);
4032 return true;
4033 }
4034 }
4035 MVT VT = N.getSimpleValueType();
4036 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
4037 if (CurDAG->MaskedValueIsZero(N, Mask)) {
4038 Val = N;
4039 return true;
4040 }
4041
4042 return false;
4043}
4044
4045/// Look for various patterns that can be done with a SHL that can be folded
4046/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
4047/// SHXADD we are trying to match.
4049 SDValue &Val) {
4050 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
4051 SDValue N0 = N.getOperand(0);
4052
4053 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
4054 (LeftShift || N0.getOpcode() == ISD::SRL) &&
4056 uint64_t Mask = N.getConstantOperandVal(1);
4057 unsigned C2 = N0.getConstantOperandVal(1);
4058
4059 unsigned XLen = Subtarget->getXLen();
4060 if (LeftShift)
4061 Mask &= maskTrailingZeros<uint64_t>(C2);
4062 else
4063 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
4064
4065 if (isShiftedMask_64(Mask)) {
4066 unsigned Leading = XLen - llvm::bit_width(Mask);
4067 unsigned Trailing = llvm::countr_zero(Mask);
4068 if (Trailing != ShAmt)
4069 return false;
4070
4071 unsigned Opcode;
4072 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
4073 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
4074 // followed by a SHXADD with c3 for the X amount.
4075 if (LeftShift && Leading == 0 && C2 < Trailing)
4076 Opcode = RISCV::SRLI;
4077 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
4078 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
4079 // followed by a SHXADD with c3 for the X amount.
4080 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
4081 Opcode = RISCV::SRLIW;
4082 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
4083 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
4084 // followed by a SHXADD using c3 for the X amount.
4085 else if (!LeftShift && Leading == C2)
4086 Opcode = RISCV::SRLI;
4087 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
4088 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
4089 // followed by a SHXADD using c3 for the X amount.
4090 else if (!LeftShift && Leading == 32 + C2)
4091 Opcode = RISCV::SRLIW;
4092 else
4093 return false;
4094
4095 SDLoc DL(N);
4096 EVT VT = N.getValueType();
4097 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
4098 Val = SDValue(
4099 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
4100 CurDAG->getTargetConstant(ShAmt, DL, VT)),
4101 0);
4102 return true;
4103 }
4104 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
4106 uint64_t Mask = N.getConstantOperandVal(1);
4107 unsigned C2 = N0.getConstantOperandVal(1);
4108
4109 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4110 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4111 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4112 // the X amount.
4113 if (isShiftedMask_64(Mask)) {
4114 unsigned XLen = Subtarget->getXLen();
4115 unsigned Leading = XLen - llvm::bit_width(Mask);
4116 unsigned Trailing = llvm::countr_zero(Mask);
4117 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4118 SDLoc DL(N);
4119 EVT VT = N.getValueType();
4120 Val = SDValue(CurDAG->getMachineNode(
4121 RISCV::SRAI, DL, VT, N0.getOperand(0),
4122 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
4123 0);
4124 Val = SDValue(CurDAG->getMachineNode(
4125 RISCV::SRLI, DL, VT, Val,
4126 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
4127 0);
4128 return true;
4129 }
4130 }
4131 }
4132 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4133 (LeftShift || N.getOpcode() == ISD::SRL) &&
4134 isa<ConstantSDNode>(N.getOperand(1))) {
4135 SDValue N0 = N.getOperand(0);
4136 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4138 uint64_t Mask = N0.getConstantOperandVal(1);
4139 if (isShiftedMask_64(Mask)) {
4140 unsigned C1 = N.getConstantOperandVal(1);
4141 unsigned XLen = Subtarget->getXLen();
4142 unsigned Leading = XLen - llvm::bit_width(Mask);
4143 unsigned Trailing = llvm::countr_zero(Mask);
4144 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4145 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4146 if (LeftShift && Leading == 32 && Trailing > 0 &&
4147 (Trailing + C1) == ShAmt) {
4148 SDLoc DL(N);
4149 EVT VT = N.getValueType();
4150 Val = SDValue(CurDAG->getMachineNode(
4151 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4152 CurDAG->getTargetConstant(Trailing, DL, VT)),
4153 0);
4154 return true;
4155 }
4156 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4157 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4158 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4159 (Trailing - C1) == ShAmt) {
4160 SDLoc DL(N);
4161 EVT VT = N.getValueType();
4162 Val = SDValue(CurDAG->getMachineNode(
4163 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4164 CurDAG->getTargetConstant(Trailing, DL, VT)),
4165 0);
4166 return true;
4167 }
4168 }
4169 }
4170 }
4171
4172 return false;
4173}
4174
4175/// Look for various patterns that can be done with a SHL that can be folded
4176/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4177/// SHXADD_UW we are trying to match.
4179 SDValue &Val) {
4180 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
4181 N.hasOneUse()) {
4182 SDValue N0 = N.getOperand(0);
4183 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4184 N0.hasOneUse()) {
4185 uint64_t Mask = N.getConstantOperandVal(1);
4186 unsigned C2 = N0.getConstantOperandVal(1);
4187
4188 Mask &= maskTrailingZeros<uint64_t>(C2);
4189
4190 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4191 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4192 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4193 if (isShiftedMask_64(Mask)) {
4194 unsigned Leading = llvm::countl_zero(Mask);
4195 unsigned Trailing = llvm::countr_zero(Mask);
4196 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4197 SDLoc DL(N);
4198 EVT VT = N.getValueType();
4199 Val = SDValue(CurDAG->getMachineNode(
4200 RISCV::SLLI, DL, VT, N0.getOperand(0),
4201 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
4202 0);
4203 return true;
4204 }
4205 }
4206 }
4207 }
4208
4209 return false;
4210}
4211
4213 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4214 if (N->getFlags().hasDisjoint())
4215 return true;
4216 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
4217}
4218
4219bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4220 SDValue N, SDValue &Val) {
4221 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
4222 /*CompressionCost=*/true);
4223 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
4224 /*CompressionCost=*/true);
4225 if (OrigCost <= Cost)
4226 return false;
4227
4228 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
4229 return true;
4230}
4231
4233 if (!isa<ConstantSDNode>(N))
4234 return false;
4235 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4236 if ((Imm >> 31) != 1)
4237 return false;
4238
4239 for (const SDNode *U : N->users()) {
4240 switch (U->getOpcode()) {
4241 case ISD::ADD:
4242 break;
4243 case ISD::OR:
4244 if (orDisjoint(U))
4245 break;
4246 return false;
4247 default:
4248 return false;
4249 }
4250 }
4251
4252 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
4253}
4254
4256 if (!isa<ConstantSDNode>(N))
4257 return false;
4258 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4259 if (isInt<32>(Imm))
4260 return false;
4261 if (Imm == INT64_MIN)
4262 return false;
4263
4264 for (const SDNode *U : N->users()) {
4265 switch (U->getOpcode()) {
4266 case ISD::ADD:
4267 break;
4268 case RISCVISD::VMV_V_X_VL:
4269 if (!all_of(U->users(), [](const SDNode *V) {
4270 return V->getOpcode() == ISD::ADD ||
4271 V->getOpcode() == RISCVISD::ADD_VL;
4272 }))
4273 return false;
4274 break;
4275 default:
4276 return false;
4277 }
4278 }
4279
4280 return selectImm64IfCheaper(-Imm, Imm, N, Val);
4281}
4282
4284 if (!isa<ConstantSDNode>(N))
4285 return false;
4286 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4287
4288 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4289 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4290 return false;
4291
4292 // Abandon this transform if the constant is needed elsewhere.
4293 for (const SDNode *U : N->users()) {
4294 switch (U->getOpcode()) {
4295 case ISD::AND:
4296 case ISD::OR:
4297 case ISD::XOR:
4298 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4299 return false;
4300 break;
4301 case RISCVISD::VMV_V_X_VL:
4302 if (!Subtarget->hasStdExtZvkb())
4303 return false;
4304 if (!all_of(U->users(), [](const SDNode *V) {
4305 return V->getOpcode() == ISD::AND ||
4306 V->getOpcode() == RISCVISD::AND_VL;
4307 }))
4308 return false;
4309 break;
4310 default:
4311 return false;
4312 }
4313 }
4314
4315 if (isInt<32>(Imm)) {
4316 Val =
4317 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
4318 return true;
4319 }
4320
4321 // For 64-bit constants, the instruction sequences get complex,
4322 // so we select inverted only if it's cheaper.
4323 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4324}
4325
4326static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4327 unsigned Bits,
4328 const TargetInstrInfo *TII) {
4329 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4330
4331 if (!MCOpcode)
4332 return false;
4333
4334 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4335 const uint64_t TSFlags = MCID.TSFlags;
4336 if (!RISCVII::hasSEWOp(TSFlags))
4337 return false;
4338 assert(RISCVII::hasVLOp(TSFlags));
4339
4340 unsigned ChainOpIdx = User->getNumOperands() - 1;
4341 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4342 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4343 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4344 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4345
4346 if (UserOpNo == VLIdx)
4347 return false;
4348
4349 auto NumDemandedBits =
4350 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4351 return NumDemandedBits && Bits >= *NumDemandedBits;
4352}
4353
4354// Return true if all users of this SDNode* only consume the lower \p Bits.
4355// This can be used to form W instructions for add/sub/mul/shl even when the
4356// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4357// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4358// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4359// the add/sub/mul/shl to become non-W instructions. By checking the users we
4360// may be able to use a W instruction and CSE with the other instruction if
4361// this has happened. We could try to detect that the CSE opportunity exists
4362// before doing this, but that would be more complicated.
4364 const unsigned Depth) const {
4365 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4366 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4367 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4368 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4369 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4370 isa<ConstantSDNode>(Node) || Depth != 0) &&
4371 "Unexpected opcode");
4372
4374 return false;
4375
4376 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4377 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4378 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4379 return false;
4380
4381 for (SDUse &Use : Node->uses()) {
4382 SDNode *User = Use.getUser();
4383 // Users of this node should have already been instruction selected
4384 if (!User->isMachineOpcode())
4385 return false;
4386
4387 // TODO: Add more opcodes?
4388 switch (User->getMachineOpcode()) {
4389 default:
4391 break;
4392 return false;
4393 case RISCV::ADDW:
4394 case RISCV::ADDIW:
4395 case RISCV::SUBW:
4396 case RISCV::MULW:
4397 case RISCV::SLLW:
4398 case RISCV::SLLIW:
4399 case RISCV::SRAW:
4400 case RISCV::SRAIW:
4401 case RISCV::SRLW:
4402 case RISCV::SRLIW:
4403 case RISCV::DIVW:
4404 case RISCV::DIVUW:
4405 case RISCV::REMW:
4406 case RISCV::REMUW:
4407 case RISCV::ROLW:
4408 case RISCV::RORW:
4409 case RISCV::RORIW:
4410 case RISCV::CLSW:
4411 case RISCV::CLZW:
4412 case RISCV::CTZW:
4413 case RISCV::CPOPW:
4414 case RISCV::SLLI_UW:
4415 case RISCV::ABSW:
4416 case RISCV::FMV_W_X:
4417 case RISCV::FCVT_H_W:
4418 case RISCV::FCVT_H_W_INX:
4419 case RISCV::FCVT_H_WU:
4420 case RISCV::FCVT_H_WU_INX:
4421 case RISCV::FCVT_S_W:
4422 case RISCV::FCVT_S_W_INX:
4423 case RISCV::FCVT_S_WU:
4424 case RISCV::FCVT_S_WU_INX:
4425 case RISCV::FCVT_D_W:
4426 case RISCV::FCVT_D_W_INX:
4427 case RISCV::FCVT_D_WU:
4428 case RISCV::FCVT_D_WU_INX:
4429 case RISCV::TH_REVW:
4430 case RISCV::TH_SRRIW:
4431 if (Bits >= 32)
4432 break;
4433 return false;
4434 case RISCV::SLL:
4435 case RISCV::SRA:
4436 case RISCV::SRL:
4437 case RISCV::ROL:
4438 case RISCV::ROR:
4439 case RISCV::BSET:
4440 case RISCV::BCLR:
4441 case RISCV::BINV:
4442 // Shift amount operands only use log2(Xlen) bits.
4443 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4444 break;
4445 return false;
4446 case RISCV::SLLI:
4447 // SLLI only uses the lower (XLen - ShAmt) bits.
4448 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4449 break;
4450 return false;
4451 case RISCV::ANDI:
4452 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4453 break;
4454 goto RecCheck;
4455 case RISCV::ORI: {
4456 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4457 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4458 break;
4459 [[fallthrough]];
4460 }
4461 case RISCV::AND:
4462 case RISCV::OR:
4463 case RISCV::XOR:
4464 case RISCV::XORI:
4465 case RISCV::ANDN:
4466 case RISCV::ORN:
4467 case RISCV::XNOR:
4468 case RISCV::SH1ADD:
4469 case RISCV::SH2ADD:
4470 case RISCV::SH3ADD:
4471 RecCheck:
4472 if (hasAllNBitUsers(User, Bits, Depth + 1))
4473 break;
4474 return false;
4475 case RISCV::SRLI: {
4476 unsigned ShAmt = User->getConstantOperandVal(1);
4477 // If we are shifting right by less than Bits, and users don't demand any
4478 // bits that were shifted into [Bits-1:0], then we can consider this as an
4479 // N-Bit user.
4480 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4481 break;
4482 return false;
4483 }
4484 case RISCV::SEXT_B:
4485 case RISCV::PACKH:
4486 if (Bits >= 8)
4487 break;
4488 return false;
4489 case RISCV::SEXT_H:
4490 case RISCV::FMV_H_X:
4491 case RISCV::ZEXT_H_RV32:
4492 case RISCV::ZEXT_H_RV64:
4493 case RISCV::PACKW:
4494 if (Bits >= 16)
4495 break;
4496 return false;
4497 case RISCV::PACK:
4498 if (Bits >= (Subtarget->getXLen() / 2))
4499 break;
4500 return false;
4501 case RISCV::PPAIRE_H:
4502 // If only the lower 32-bits of the result are used, then only the
4503 // lower 16 bits of the inputs are used.
4504 if (Bits >= 16 && hasAllNBitUsers(User, 32, Depth + 1))
4505 break;
4506 return false;
4507 case RISCV::ADD_UW:
4508 case RISCV::SH1ADD_UW:
4509 case RISCV::SH2ADD_UW:
4510 case RISCV::SH3ADD_UW:
4511 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4512 // 32 bits.
4513 if (Use.getOperandNo() == 0 && Bits >= 32)
4514 break;
4515 return false;
4516 case RISCV::SB:
4517 if (Use.getOperandNo() == 0 && Bits >= 8)
4518 break;
4519 return false;
4520 case RISCV::SH:
4521 if (Use.getOperandNo() == 0 && Bits >= 16)
4522 break;
4523 return false;
4524 case RISCV::SW:
4525 if (Use.getOperandNo() == 0 && Bits >= 32)
4526 break;
4527 return false;
4528 case RISCV::TH_EXT:
4529 case RISCV::TH_EXTU: {
4530 unsigned Msb = User->getConstantOperandVal(1);
4531 unsigned Lsb = User->getConstantOperandVal(2);
4532 // Behavior of Msb < Lsb is not well documented.
4533 if (Msb >= Lsb && Bits > Msb)
4534 break;
4535 return false;
4536 }
4537 }
4538 }
4539
4540 return true;
4541}
4542
4543// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4545 SDValue &Shl2) {
4546 auto *C = dyn_cast<ConstantSDNode>(N);
4547 if (!C)
4548 return false;
4549
4550 int64_t Offset = C->getSExtValue();
4551 for (unsigned Shift = 0; Shift < 4; Shift++) {
4552 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4553 EVT VT = N->getValueType(0);
4554 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4555 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4556 return true;
4557 }
4558 }
4559
4560 return false;
4561}
4562
4563// Select VL as a 5 bit immediate or a value that will become a register. This
4564// allows us to choose between VSETIVLI or VSETVLI later.
4566 auto *C = dyn_cast<ConstantSDNode>(N);
4567 if (C && isUInt<5>(C->getZExtValue())) {
4568 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4569 N->getValueType(0));
4570 } else if (C && C->isAllOnes()) {
4571 // Treat all ones as VLMax.
4572 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4573 N->getValueType(0));
4574 } else if (isa<RegisterSDNode>(N) &&
4575 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4576 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4577 // as the register class. Convert X0 to a special immediate to pass the
4578 // MachineVerifier. This is recognized specially by the vsetvli insertion
4579 // pass.
4580 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4581 N->getValueType(0));
4582 } else {
4583 VL = N;
4584 }
4585
4586 return true;
4587}
4588
4590 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4591 if (!N.getOperand(0).isUndef())
4592 return SDValue();
4593 N = N.getOperand(1);
4594 }
4595 SDValue Splat = N;
4596 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4597 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4598 !Splat.getOperand(0).isUndef())
4599 return SDValue();
4600 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4601 return Splat;
4602}
4603
4606 if (!Splat)
4607 return false;
4608
4609 SplatVal = Splat.getOperand(1);
4610 return true;
4611}
4612
4614 SelectionDAG &DAG,
4615 const RISCVSubtarget &Subtarget,
4616 std::function<bool(int64_t)> ValidateImm,
4617 bool Decrement = false) {
4619 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4620 return false;
4621
4622 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4623 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4624 "Unexpected splat operand type");
4625
4626 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4627 // type is wider than the resulting vector element type: an implicit
4628 // truncation first takes place. Therefore, perform a manual
4629 // truncation/sign-extension in order to ignore any truncated bits and catch
4630 // any zero-extended immediate.
4631 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4632 // sign-extending to (XLenVT -1).
4633 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4634
4635 int64_t SplatImm = SplatConst.getSExtValue();
4636
4637 if (!ValidateImm(SplatImm))
4638 return false;
4639
4640 if (Decrement)
4641 SplatImm -= 1;
4642
4643 SplatVal =
4644 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4645 return true;
4646}
4647
4649 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4650 [](int64_t Imm) { return isInt<5>(Imm); });
4651}
4652
4654 return selectVSplatImmHelper(
4655 N, SplatVal, *CurDAG, *Subtarget,
4656 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4657 /*Decrement=*/true);
4658}
4659
4661 return selectVSplatImmHelper(
4662 N, SplatVal, *CurDAG, *Subtarget,
4663 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4664 /*Decrement=*/false);
4665}
4666
4668 SDValue &SplatVal) {
4669 return selectVSplatImmHelper(
4670 N, SplatVal, *CurDAG, *Subtarget,
4671 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4672 /*Decrement=*/true);
4673}
4674
4676 SDValue &SplatVal) {
4677 return selectVSplatImmHelper(
4678 N, SplatVal, *CurDAG, *Subtarget,
4679 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4680}
4681
4684 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4685}
4686
4688 auto IsExtOrTrunc = [](SDValue N) {
4689 switch (N->getOpcode()) {
4690 case ISD::SIGN_EXTEND:
4691 case ISD::ZERO_EXTEND:
4692 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4693 // inactive elements will be undef.
4694 case RISCVISD::TRUNCATE_VECTOR_VL:
4695 case RISCVISD::VSEXT_VL:
4696 case RISCVISD::VZEXT_VL:
4697 return true;
4698 default:
4699 return false;
4700 }
4701 };
4702
4703 // We can have multiple nested nodes, so unravel them all if needed.
4704 while (IsExtOrTrunc(N)) {
4705 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4706 return false;
4707 N = N->getOperand(0);
4708 }
4709
4710 return selectVSplat(N, SplatVal);
4711}
4712
4714 // Allow bitcasts from XLenVT -> FP.
4715 if (N.getOpcode() == ISD::BITCAST &&
4716 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4717 Imm = N.getOperand(0);
4718 return true;
4719 }
4720 // Allow moves from XLenVT to FP.
4721 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4722 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4723 Imm = N.getOperand(0);
4724 return true;
4725 }
4726
4727 // Otherwise, look for FP constants that can materialized with scalar int.
4729 if (!CFP)
4730 return false;
4731 const APFloat &APF = CFP->getValueAPF();
4732 // td can handle +0.0 already.
4733 if (APF.isPosZero())
4734 return false;
4735
4736 MVT VT = CFP->getSimpleValueType(0);
4737
4738 MVT XLenVT = Subtarget->getXLenVT();
4739 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4740 assert(APF.isNegZero() && "Unexpected constant.");
4741 return false;
4742 }
4743 SDLoc DL(N);
4744 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4745 *Subtarget);
4746 return true;
4747}
4748
4750 SDValue &Imm) {
4751 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4752 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4753
4754 if (!isInt<5>(ImmVal))
4755 return false;
4756
4757 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4758 Subtarget->getXLenVT());
4759 return true;
4760 }
4761
4762 return false;
4763}
4764
4765// Match XOR with a VMSET_VL operand. Return the other operand.
4767 if (N.getOpcode() != ISD::XOR)
4768 return false;
4769
4770 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4771 Res = N.getOperand(1);
4772 return true;
4773 }
4774
4775 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4776 Res = N.getOperand(0);
4777 return true;
4778 }
4779
4780 return false;
4781}
4782
4783// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4784// matches the parent's VL. Return the other operand of the VMXOR_VL.
4786 SDValue &Res) {
4787 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4788 return false;
4789
4790 assert(Parent &&
4791 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4792 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4793 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4794 "Unexpected parent");
4795
4796 // The VL should match the parent.
4797 if (Parent->getOperand(2) != N->getOperand(2))
4798 return false;
4799
4800 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4801 Res = N.getOperand(1);
4802 return true;
4803 }
4804
4805 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4806 Res = N.getOperand(0);
4807 return true;
4808 }
4809
4810 return false;
4811}
4812
4813// Try to remove sext.w if the input is a W instruction or can be made into
4814// a W instruction cheaply.
4815bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4816 // Look for the sext.w pattern, addiw rd, rs1, 0.
4817 if (N->getMachineOpcode() != RISCV::ADDIW ||
4818 !isNullConstant(N->getOperand(1)))
4819 return false;
4820
4821 SDValue N0 = N->getOperand(0);
4822 if (!N0.isMachineOpcode())
4823 return false;
4824
4825 switch (N0.getMachineOpcode()) {
4826 default:
4827 break;
4828 case RISCV::ADD:
4829 case RISCV::ADDI:
4830 case RISCV::SUB:
4831 case RISCV::MUL:
4832 case RISCV::SLLI: {
4833 // Convert sext.w+add/sub/mul to their W instructions. This will create
4834 // a new independent instruction. This improves latency.
4835 unsigned Opc;
4836 switch (N0.getMachineOpcode()) {
4837 default:
4838 llvm_unreachable("Unexpected opcode!");
4839 case RISCV::ADD: Opc = RISCV::ADDW; break;
4840 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4841 case RISCV::SUB: Opc = RISCV::SUBW; break;
4842 case RISCV::MUL: Opc = RISCV::MULW; break;
4843 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4844 }
4845
4846 SDValue N00 = N0.getOperand(0);
4847 SDValue N01 = N0.getOperand(1);
4848
4849 // Shift amount needs to be uimm5.
4850 if (N0.getMachineOpcode() == RISCV::SLLI &&
4851 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4852 break;
4853
4854 SDNode *Result =
4855 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4856 N00, N01);
4857 ReplaceUses(N, Result);
4858 return true;
4859 }
4860 case RISCV::ADDW:
4861 case RISCV::ADDIW:
4862 case RISCV::SUBW:
4863 case RISCV::MULW:
4864 case RISCV::SLLIW:
4865 case RISCV::PACKW:
4866 case RISCV::TH_MULAW:
4867 case RISCV::TH_MULAH:
4868 case RISCV::TH_MULSW:
4869 case RISCV::TH_MULSH:
4870 if (N0.getValueType() == MVT::i32)
4871 break;
4872
4873 // Result is already sign extended just remove the sext.w.
4874 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4875 ReplaceUses(N, N0.getNode());
4876 return true;
4877 }
4878
4879 return false;
4880}
4881
4882static bool usesAllOnesMask(SDValue MaskOp) {
4883 const auto IsVMSet = [](unsigned Opc) {
4884 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4885 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4886 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4887 Opc == RISCV::PseudoVMSET_M_B8;
4888 };
4889
4890 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4891 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4892 // assume that it's all-ones? Same applies to its VL.
4893 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4894}
4895
4896static bool isImplicitDef(SDValue V) {
4897 if (!V.isMachineOpcode())
4898 return false;
4899 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4900 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4901 if (!isImplicitDef(V.getOperand(I)))
4902 return false;
4903 return true;
4904 }
4905 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4906}
4907
4908// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4909// corresponding "unmasked" pseudo versions.
4910bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4911 const RISCV::RISCVMaskedPseudoInfo *I =
4912 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4913 if (!I)
4914 return false;
4915
4916 unsigned MaskOpIdx = I->MaskOpIdx;
4917 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4918 return false;
4919
4920 // There are two classes of pseudos in the table - compares and
4921 // everything else. See the comment on RISCVMaskedPseudo for details.
4922 const unsigned Opc = I->UnmaskedPseudo;
4923 const MCInstrDesc &MCID = TII->get(Opc);
4924 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4925
4926 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4927 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4928
4929 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4931 "Unmasked pseudo has policy but masked pseudo doesn't?");
4932 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4933 "Unexpected pseudo structure");
4934 assert(!(HasPassthru && !MaskedHasPassthru) &&
4935 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4936
4938 // Skip the passthru operand at index 0 if the unmasked don't have one.
4939 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4940 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4941 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4942 bool HasChainOp =
4943 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4944 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4945 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4946 // Skip the mask
4947 SDValue Op = N->getOperand(I);
4948 if (I == MaskOpIdx)
4949 continue;
4950 if (DropPolicy && I == LastOpNum)
4951 continue;
4952 Ops.push_back(Op);
4953 }
4954
4955 MachineSDNode *Result =
4956 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4957
4958 if (!N->memoperands_empty())
4959 CurDAG->setNodeMemRefs(Result, N->memoperands());
4960
4961 Result->setFlags(N->getFlags());
4962 ReplaceUses(N, Result);
4963
4964 return true;
4965}
4966
4967/// If our passthru is an implicit_def, use noreg instead. This side
4968/// steps issues with MachineCSE not being able to CSE expressions with
4969/// IMPLICIT_DEF operands while preserving the semantic intent. See
4970/// pr64282 for context. Note that this transform is the last one
4971/// performed at ISEL DAG to DAG.
4972bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4973 bool MadeChange = false;
4974 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4975
4976 while (Position != CurDAG->allnodes_begin()) {
4977 SDNode *N = &*--Position;
4978 if (N->use_empty() || !N->isMachineOpcode())
4979 continue;
4980
4981 const unsigned Opc = N->getMachineOpcode();
4982 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4984 !isImplicitDef(N->getOperand(0)))
4985 continue;
4986
4988 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4989 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4990 SDValue Op = N->getOperand(I);
4991 Ops.push_back(Op);
4992 }
4993
4994 MachineSDNode *Result =
4995 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4996 Result->setFlags(N->getFlags());
4997 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4998 ReplaceUses(N, Result);
4999 MadeChange = true;
5000 }
5001 return MadeChange;
5002}
5003
5004
5005// This pass converts a legalized DAG into a RISCV-specific DAG, ready
5006// for instruction scheduling.
5008 CodeGenOptLevel OptLevel) {
5009 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
5010}
5011
5013
5018
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
static std::pair< SDValue, SDValue > extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
static bool isApplicableToPLIOrPLUI(int Val)
#define INST_ALL_NF_CASE(NAME)
cl::opt< uint32_t > PreferredLandingPadLabel("riscv-landing-pad-label", cl::ReallyHidden, cl::desc("Use preferred fixed label for all labels"))
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
bool isPosZero() const
Definition APFloat.h:1549
bool isNegZero() const
Definition APFloat.h:1550
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
LLVM_ABI bool isSplat(unsigned SplatSizeInBits) const
Check if the APInt consists of a repeated bit pattern.
Definition APInt.cpp:631
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:475
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool selectVMNOT_VLOp(SDNode *Parent, SDValue N, SDValue &Res)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool tryWideningMulAcc(SDNode *Node, const SDLoc &DL)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVMNOTOp(SDValue N, SDValue &Res)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
iterator_range< user_iterator > users()
Definition Value.h:426
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:861
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.