LLVM 23.0.0git
RISCVVLOptimizer.cpp
Go to the documentation of this file.
1//===-------------- RISCVVLOptimizer.cpp - VL Optimizer -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass reduces the VL where possible at the MI level, before VSETVLI
10// instructions are inserted.
11//
12// The purpose of this optimization is to make the VL argument, for instructions
13// that have a VL argument, as small as possible.
14//
15// This is split into a sparse dataflow analysis where we determine what VL is
16// demanded by each instruction first, and then afterwards try to reduce the VL
17// of each instruction if it demands less than its VL operand.
18//
19// The analysis is explained in more detail in the 2025 EuroLLVM Developers'
20// Meeting talk "Accidental Dataflow Analysis: Extending the RISC-V VL
21// Optimizer", which is available on YouTube at
22// https://www.youtube.com/watch?v=Mfb5fRSdJAc
23//
24// The slides for the talk are available at
25// https://llvm.org/devmtg/2025-04/slides/technical_talk/lau_accidental_dataflow.pdf
26//
27//===---------------------------------------------------------------------===//
28
29#include "RISCV.h"
30#include "RISCVSubtarget.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "riscv-vl-optimizer"
39#define PASS_NAME "RISC-V VL Optimizer"
40
41namespace {
42
43/// Wrapper around MachineOperand that defaults to immediate 0.
44struct DemandedVL {
46 DemandedVL() : VL(MachineOperand::CreateImm(0)) {}
47 DemandedVL(MachineOperand VL) : VL(VL) {}
48 static DemandedVL vlmax() {
50 }
51 bool operator!=(const DemandedVL &Other) const {
52 return !VL.isIdenticalTo(Other.VL);
53 }
54
55 DemandedVL max(const DemandedVL &X) const {
56 if (RISCV::isVLKnownLE(VL, X.VL))
57 return X;
58 if (RISCV::isVLKnownLE(X.VL, VL))
59 return *this;
60 return DemandedVL::vlmax();
61 }
62};
63
64class RISCVVLOptimizer : public MachineFunctionPass {
66 const MachineDominatorTree *MDT;
67 const TargetInstrInfo *TII;
68
69public:
70 static char ID;
71
72 RISCVVLOptimizer() : MachineFunctionPass(ID) {}
73
74 bool runOnMachineFunction(MachineFunction &MF) override;
75
76 void getAnalysisUsage(AnalysisUsage &AU) const override {
77 AU.setPreservesCFG();
80 }
81
82 StringRef getPassName() const override { return PASS_NAME; }
83
84private:
85 DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const;
86 /// Returns true if the users of \p MI have compatible EEWs and SEWs.
87 bool checkUsers(const MachineInstr &MI) const;
88 bool tryReduceVL(MachineInstr &MI, MachineOperand VL) const;
89 bool isSupportedInstr(const MachineInstr &MI) const;
90 bool isCandidate(const MachineInstr &MI) const;
91 void transfer(const MachineInstr &MI);
92
93 /// For a given instruction, records what elements of it are demanded by
94 /// downstream users.
97
98 /// \returns all vector virtual registers that \p MI uses.
99 auto virtual_vec_uses(const MachineInstr &MI) const {
100 return make_filter_range(MI.uses(), [this](const MachineOperand &MO) {
101 return MO.isReg() && MO.getReg().isVirtual() &&
102 RISCVRegisterInfo::isRVVRegClass(MRI->getRegClass(MO.getReg()));
103 });
104 }
105};
106
107/// Represents the EMUL and EEW of a MachineOperand.
108struct OperandInfo {
109 // Represent as 1,2,4,8, ... and fractional indicator. This is because
110 // EMUL can take on values that don't map to RISCVVType::VLMUL values exactly.
111 // For example, a mask operand can have an EMUL less than MF8.
112 // If nullopt, then EMUL isn't used (i.e. only a single scalar is read).
113 std::optional<std::pair<unsigned, bool>> EMUL;
114
115 unsigned Log2EEW;
116
117 OperandInfo(RISCVVType::VLMUL EMUL, unsigned Log2EEW)
118 : EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {}
119
120 OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
121 : EMUL(EMUL), Log2EEW(Log2EEW) {}
122
123 OperandInfo(unsigned Log2EEW) : Log2EEW(Log2EEW) {}
124
125 OperandInfo() = delete;
126
127 /// Return true if the EMUL and EEW produced by \p Def is compatible with the
128 /// EMUL and EEW used by \p User.
129 static bool areCompatible(const OperandInfo &Def, const OperandInfo &User) {
130 if (Def.Log2EEW != User.Log2EEW)
131 return false;
132 if (User.EMUL && Def.EMUL != User.EMUL)
133 return false;
134 return true;
135 }
136
137 void print(raw_ostream &OS) const {
138 if (EMUL) {
139 OS << "EMUL: m";
140 if (EMUL->second)
141 OS << "f";
142 OS << EMUL->first;
143 } else
144 OS << "EMUL: none\n";
145 OS << ", EEW: " << (1 << Log2EEW);
146 }
147};
148
149} // end anonymous namespace
150
151char RISCVVLOptimizer::ID = 0;
152INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)
155
157 return new RISCVVLOptimizer();
158}
159
160[[maybe_unused]]
161static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
162 OI.print(OS);
163 return OS;
164}
165
166[[maybe_unused]]
168 const std::optional<OperandInfo> &OI) {
169 if (OI)
170 OI->print(OS);
171 else
172 OS << "nullopt";
173 return OS;
174}
175
176/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and
177/// SEW are from the TSFlags of MI.
178static std::pair<unsigned, bool>
180 RISCVVType::VLMUL MIVLMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
181 auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL);
182 unsigned MILog2SEW =
183 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
184
185 // Mask instructions will have 0 as the SEW operand. But the LMUL of these
186 // instructions is calculated is as if the SEW operand was 3 (e8).
187 if (MILog2SEW == 0)
188 MILog2SEW = 3;
189
190 unsigned MISEW = 1 << MILog2SEW;
191
192 unsigned EEW = 1 << Log2EEW;
193 // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
194 // to put fraction in simplest form.
195 unsigned Num = EEW, Denom = MISEW;
196 int GCD = MILMULIsFractional ? std::gcd(Num, Denom * MILMUL)
197 : std::gcd(Num * MILMUL, Denom);
198 Num = MILMULIsFractional ? Num / GCD : Num * MILMUL / GCD;
199 Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD;
200 return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
201}
202
203/// Dest has EEW=SEW. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
204/// SEW comes from TSFlags of MI.
205static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
206 const MachineInstr &MI,
207 const MachineOperand &MO) {
208 unsigned MILog2SEW =
209 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
210
211 if (MO.getOperandNo() == 0)
212 return MILog2SEW;
213
214 unsigned MISEW = 1 << MILog2SEW;
215 unsigned EEW = MISEW / Factor;
216 unsigned Log2EEW = Log2_32(EEW);
217
218 return Log2EEW;
219}
220
221#define VSEG_CASES(Prefix, EEW) \
222 RISCV::Prefix##SEG2E##EEW##_V: \
223 case RISCV::Prefix##SEG3E##EEW##_V: \
224 case RISCV::Prefix##SEG4E##EEW##_V: \
225 case RISCV::Prefix##SEG5E##EEW##_V: \
226 case RISCV::Prefix##SEG6E##EEW##_V: \
227 case RISCV::Prefix##SEG7E##EEW##_V: \
228 case RISCV::Prefix##SEG8E##EEW##_V
229#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW)
230#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW)
231#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW)
232#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW)
233
234static std::optional<unsigned> getOperandLog2EEW(const MachineOperand &MO) {
235 const MachineInstr &MI = *MO.getParent();
236 const MCInstrDesc &Desc = MI.getDesc();
238 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
239 assert(RVV && "Could not find MI in PseudoTable");
240
241 // MI has a SEW associated with it. The RVV specification defines
242 // the EEW of each operand and definition in relation to MI.SEW.
243 unsigned MILog2SEW = MI.getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
244
245 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
246 const bool IsTied = RISCVII::isTiedPseudo(Desc.TSFlags);
247
248 bool IsMODef = MO.getOperandNo() == 0 ||
249 (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs());
250
251 // All mask operands have EEW=1
252 const MCOperandInfo &Info = Desc.operands()[MO.getOperandNo()];
253 if (Info.OperandType == MCOI::OPERAND_REGISTER &&
254 Info.RegClass == RISCV::VMV0RegClassID)
255 return 0;
256
257 // switch against BaseInstr to reduce number of cases that need to be
258 // considered.
259 switch (RVV->BaseInstr) {
260
261 // 6. Configuration-Setting Instructions
262 // Configuration setting instructions do not read or write vector registers
263 case RISCV::VSETIVLI:
264 case RISCV::VSETVL:
265 case RISCV::VSETVLI:
266 llvm_unreachable("Configuration setting instructions do not read or write "
267 "vector registers");
268
269 // Vector Loads and Stores
270 // Vector Unit-Stride Instructions
271 // Vector Strided Instructions
272 /// Dest EEW encoded in the instruction
273 case RISCV::VLM_V:
274 case RISCV::VSM_V:
275 return 0;
276 case RISCV::VLE8_V:
277 case RISCV::VSE8_V:
278 case RISCV::VLSE8_V:
279 case RISCV::VSSE8_V:
280 case VSSEG_CASES(8):
281 case VSSSEG_CASES(8):
282 return 3;
283 case RISCV::VLE16_V:
284 case RISCV::VSE16_V:
285 case RISCV::VLSE16_V:
286 case RISCV::VSSE16_V:
287 case VSSEG_CASES(16):
288 case VSSSEG_CASES(16):
289 return 4;
290 case RISCV::VLE32_V:
291 case RISCV::VSE32_V:
292 case RISCV::VLSE32_V:
293 case RISCV::VSSE32_V:
294 case VSSEG_CASES(32):
295 case VSSSEG_CASES(32):
296 return 5;
297 case RISCV::VLE64_V:
298 case RISCV::VSE64_V:
299 case RISCV::VLSE64_V:
300 case RISCV::VSSE64_V:
301 case VSSEG_CASES(64):
302 case VSSSEG_CASES(64):
303 return 6;
304
305 // Vector Indexed Instructions
306 // vs(o|u)xei<eew>.v
307 // Dest/Data (operand 0) EEW=SEW. Source EEW=<eew>.
308 case RISCV::VLUXEI8_V:
309 case RISCV::VLOXEI8_V:
310 case RISCV::VSUXEI8_V:
311 case RISCV::VSOXEI8_V:
312 case VSUXSEG_CASES(8):
313 case VSOXSEG_CASES(8): {
314 if (MO.getOperandNo() == 0)
315 return MILog2SEW;
316 return 3;
317 }
318 case RISCV::VLUXEI16_V:
319 case RISCV::VLOXEI16_V:
320 case RISCV::VSUXEI16_V:
321 case RISCV::VSOXEI16_V:
322 case VSUXSEG_CASES(16):
323 case VSOXSEG_CASES(16): {
324 if (MO.getOperandNo() == 0)
325 return MILog2SEW;
326 return 4;
327 }
328 case RISCV::VLUXEI32_V:
329 case RISCV::VLOXEI32_V:
330 case RISCV::VSUXEI32_V:
331 case RISCV::VSOXEI32_V:
332 case VSUXSEG_CASES(32):
333 case VSOXSEG_CASES(32): {
334 if (MO.getOperandNo() == 0)
335 return MILog2SEW;
336 return 5;
337 }
338 case RISCV::VLUXEI64_V:
339 case RISCV::VLOXEI64_V:
340 case RISCV::VSUXEI64_V:
341 case RISCV::VSOXEI64_V:
342 case VSUXSEG_CASES(64):
343 case VSOXSEG_CASES(64): {
344 if (MO.getOperandNo() == 0)
345 return MILog2SEW;
346 return 6;
347 }
348
349 // Vector Integer Arithmetic Instructions
350 // Vector Single-Width Integer Add and Subtract
351 case RISCV::VADD_VI:
352 case RISCV::VADD_VV:
353 case RISCV::VADD_VX:
354 case RISCV::VSUB_VV:
355 case RISCV::VSUB_VX:
356 case RISCV::VRSUB_VI:
357 case RISCV::VRSUB_VX:
358 // Vector Bitwise Logical Instructions
359 // Vector Single-Width Shift Instructions
360 // EEW=SEW.
361 case RISCV::VAND_VI:
362 case RISCV::VAND_VV:
363 case RISCV::VAND_VX:
364 case RISCV::VOR_VI:
365 case RISCV::VOR_VV:
366 case RISCV::VOR_VX:
367 case RISCV::VXOR_VI:
368 case RISCV::VXOR_VV:
369 case RISCV::VXOR_VX:
370 case RISCV::VSLL_VI:
371 case RISCV::VSLL_VV:
372 case RISCV::VSLL_VX:
373 case RISCV::VSRL_VI:
374 case RISCV::VSRL_VV:
375 case RISCV::VSRL_VX:
376 case RISCV::VSRA_VI:
377 case RISCV::VSRA_VV:
378 case RISCV::VSRA_VX:
379 // Vector Integer Min/Max Instructions
380 // EEW=SEW.
381 case RISCV::VMINU_VV:
382 case RISCV::VMINU_VX:
383 case RISCV::VMIN_VV:
384 case RISCV::VMIN_VX:
385 case RISCV::VMAXU_VV:
386 case RISCV::VMAXU_VX:
387 case RISCV::VMAX_VV:
388 case RISCV::VMAX_VX:
389 // Vector Single-Width Integer Multiply Instructions
390 // Source and Dest EEW=SEW.
391 case RISCV::VMUL_VV:
392 case RISCV::VMUL_VX:
393 case RISCV::VMULH_VV:
394 case RISCV::VMULH_VX:
395 case RISCV::VMULHU_VV:
396 case RISCV::VMULHU_VX:
397 case RISCV::VMULHSU_VV:
398 case RISCV::VMULHSU_VX:
399 // Vector Integer Divide Instructions
400 // EEW=SEW.
401 case RISCV::VDIVU_VV:
402 case RISCV::VDIVU_VX:
403 case RISCV::VDIV_VV:
404 case RISCV::VDIV_VX:
405 case RISCV::VREMU_VV:
406 case RISCV::VREMU_VX:
407 case RISCV::VREM_VV:
408 case RISCV::VREM_VX:
409 // Vector Single-Width Integer Multiply-Add Instructions
410 // EEW=SEW.
411 case RISCV::VMACC_VV:
412 case RISCV::VMACC_VX:
413 case RISCV::VNMSAC_VV:
414 case RISCV::VNMSAC_VX:
415 case RISCV::VMADD_VV:
416 case RISCV::VMADD_VX:
417 case RISCV::VNMSUB_VV:
418 case RISCV::VNMSUB_VX:
419 // Vector Integer Merge Instructions
420 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
421 // EEW=SEW, except the mask operand has EEW=1. Mask operand is handled
422 // before this switch.
423 case RISCV::VMERGE_VIM:
424 case RISCV::VMERGE_VVM:
425 case RISCV::VMERGE_VXM:
426 case RISCV::VADC_VIM:
427 case RISCV::VADC_VVM:
428 case RISCV::VADC_VXM:
429 case RISCV::VSBC_VVM:
430 case RISCV::VSBC_VXM:
431 // Vector Integer Move Instructions
432 // Vector Fixed-Point Arithmetic Instructions
433 // Vector Single-Width Saturating Add and Subtract
434 // Vector Single-Width Averaging Add and Subtract
435 // EEW=SEW.
436 case RISCV::VMV_V_I:
437 case RISCV::VMV_V_V:
438 case RISCV::VMV_V_X:
439 case RISCV::VSADDU_VI:
440 case RISCV::VSADDU_VV:
441 case RISCV::VSADDU_VX:
442 case RISCV::VSADD_VI:
443 case RISCV::VSADD_VV:
444 case RISCV::VSADD_VX:
445 case RISCV::VSSUBU_VV:
446 case RISCV::VSSUBU_VX:
447 case RISCV::VSSUB_VV:
448 case RISCV::VSSUB_VX:
449 case RISCV::VAADDU_VV:
450 case RISCV::VAADDU_VX:
451 case RISCV::VAADD_VV:
452 case RISCV::VAADD_VX:
453 case RISCV::VASUBU_VV:
454 case RISCV::VASUBU_VX:
455 case RISCV::VASUB_VV:
456 case RISCV::VASUB_VX:
457 // Vector Single-Width Fractional Multiply with Rounding and Saturation
458 // EEW=SEW. The instruction produces 2*SEW product internally but
459 // saturates to fit into SEW bits.
460 case RISCV::VSMUL_VV:
461 case RISCV::VSMUL_VX:
462 // Vector Single-Width Scaling Shift Instructions
463 // EEW=SEW.
464 case RISCV::VSSRL_VI:
465 case RISCV::VSSRL_VV:
466 case RISCV::VSSRL_VX:
467 case RISCV::VSSRA_VI:
468 case RISCV::VSSRA_VV:
469 case RISCV::VSSRA_VX:
470 // Vector Permutation Instructions
471 // Integer Scalar Move Instructions
472 // Floating-Point Scalar Move Instructions
473 // EEW=SEW.
474 case RISCV::VMV_X_S:
475 case RISCV::VMV_S_X:
476 case RISCV::VFMV_F_S:
477 case RISCV::VFMV_S_F:
478 // Vector Slide Instructions
479 // EEW=SEW.
480 case RISCV::VSLIDEUP_VI:
481 case RISCV::VSLIDEUP_VX:
482 case RISCV::VSLIDEDOWN_VI:
483 case RISCV::VSLIDEDOWN_VX:
484 case RISCV::VSLIDE1UP_VX:
485 case RISCV::VFSLIDE1UP_VF:
486 case RISCV::VSLIDE1DOWN_VX:
487 case RISCV::VFSLIDE1DOWN_VF:
488 // Vector Register Gather Instructions
489 // EEW=SEW. For mask operand, EEW=1.
490 case RISCV::VRGATHER_VI:
491 case RISCV::VRGATHER_VV:
492 case RISCV::VRGATHER_VX:
493 // Vector Element Index Instruction
494 case RISCV::VID_V:
495 // Vector Single-Width Floating-Point Add/Subtract Instructions
496 case RISCV::VFADD_VF:
497 case RISCV::VFADD_VV:
498 case RISCV::VFSUB_VF:
499 case RISCV::VFSUB_VV:
500 case RISCV::VFRSUB_VF:
501 // Vector Single-Width Floating-Point Multiply/Divide Instructions
502 case RISCV::VFMUL_VF:
503 case RISCV::VFMUL_VV:
504 case RISCV::VFDIV_VF:
505 case RISCV::VFDIV_VV:
506 case RISCV::VFRDIV_VF:
507 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
508 case RISCV::VFMACC_VV:
509 case RISCV::VFMACC_VF:
510 case RISCV::VFNMACC_VV:
511 case RISCV::VFNMACC_VF:
512 case RISCV::VFMSAC_VV:
513 case RISCV::VFMSAC_VF:
514 case RISCV::VFNMSAC_VV:
515 case RISCV::VFNMSAC_VF:
516 case RISCV::VFMADD_VV:
517 case RISCV::VFMADD_VF:
518 case RISCV::VFNMADD_VV:
519 case RISCV::VFNMADD_VF:
520 case RISCV::VFMSUB_VV:
521 case RISCV::VFMSUB_VF:
522 case RISCV::VFNMSUB_VV:
523 case RISCV::VFNMSUB_VF:
524 // Vector Floating-Point Square-Root Instruction
525 case RISCV::VFSQRT_V:
526 // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
527 case RISCV::VFRSQRT7_V:
528 // Vector Floating-Point Reciprocal Estimate Instruction
529 case RISCV::VFREC7_V:
530 // Vector Floating-Point MIN/MAX Instructions
531 case RISCV::VFMIN_VF:
532 case RISCV::VFMIN_VV:
533 case RISCV::VFMAX_VF:
534 case RISCV::VFMAX_VV:
535 // Vector Floating-Point Sign-Injection Instructions
536 case RISCV::VFSGNJ_VF:
537 case RISCV::VFSGNJ_VV:
538 case RISCV::VFSGNJN_VV:
539 case RISCV::VFSGNJN_VF:
540 case RISCV::VFSGNJX_VF:
541 case RISCV::VFSGNJX_VV:
542 // Vector Floating-Point Classify Instruction
543 case RISCV::VFCLASS_V:
544 // Vector Floating-Point Move Instruction
545 case RISCV::VFMV_V_F:
546 // Single-Width Floating-Point/Integer Type-Convert Instructions
547 case RISCV::VFCVT_XU_F_V:
548 case RISCV::VFCVT_X_F_V:
549 case RISCV::VFCVT_RTZ_XU_F_V:
550 case RISCV::VFCVT_RTZ_X_F_V:
551 case RISCV::VFCVT_F_XU_V:
552 case RISCV::VFCVT_F_X_V:
553 // Vector Floating-Point Merge Instruction
554 case RISCV::VFMERGE_VFM:
555 // Vector count population in mask vcpop.m
556 // vfirst find-first-set mask bit
557 case RISCV::VCPOP_M:
558 case RISCV::VFIRST_M:
559 // Vector Bit-manipulation Instructions (Zvbb)
560 // Vector And-Not
561 case RISCV::VANDN_VV:
562 case RISCV::VANDN_VX:
563 // Vector Reverse Bits in Elements
564 case RISCV::VBREV_V:
565 // Vector Reverse Bits in Bytes
566 case RISCV::VBREV8_V:
567 // Vector Reverse Bytes
568 case RISCV::VREV8_V:
569 // Vector Count Leading Zeros
570 case RISCV::VCLZ_V:
571 // Vector Count Trailing Zeros
572 case RISCV::VCTZ_V:
573 // Vector Population Count
574 case RISCV::VCPOP_V:
575 // Vector Rotate Left
576 case RISCV::VROL_VV:
577 case RISCV::VROL_VX:
578 // Vector Rotate Right
579 case RISCV::VROR_VI:
580 case RISCV::VROR_VV:
581 case RISCV::VROR_VX:
582 // Vector Carry-less Multiplication Instructions (Zvbc)
583 // Vector Carry-less Multiply
584 case RISCV::VCLMUL_VV:
585 case RISCV::VCLMUL_VX:
586 // Vector Carry-less Multiply Return High Half
587 case RISCV::VCLMULH_VV:
588 case RISCV::VCLMULH_VX:
589
590 // Zvabd
591 case RISCV::VABS_V:
592 case RISCV::VABD_VV:
593 case RISCV::VABDU_VV:
594
595 // XRivosVizip
596 case RISCV::RI_VZIPEVEN_VV:
597 case RISCV::RI_VZIPODD_VV:
598 case RISCV::RI_VZIP2A_VV:
599 case RISCV::RI_VZIP2B_VV:
600 case RISCV::RI_VUNZIP2A_VV:
601 case RISCV::RI_VUNZIP2B_VV:
602 return MILog2SEW;
603
604 // Vector Widening Shift Left Logical (Zvbb)
605 case RISCV::VWSLL_VI:
606 case RISCV::VWSLL_VX:
607 case RISCV::VWSLL_VV:
608 // Vector Widening Integer Add/Subtract
609 // Def uses EEW=2*SEW . Operands use EEW=SEW.
610 case RISCV::VWADDU_VV:
611 case RISCV::VWADDU_VX:
612 case RISCV::VWSUBU_VV:
613 case RISCV::VWSUBU_VX:
614 case RISCV::VWADD_VV:
615 case RISCV::VWADD_VX:
616 case RISCV::VWSUB_VV:
617 case RISCV::VWSUB_VX:
618 // Vector Widening Integer Multiply Instructions
619 // Destination EEW=2*SEW. Source EEW=SEW.
620 case RISCV::VWMUL_VV:
621 case RISCV::VWMUL_VX:
622 case RISCV::VWMULSU_VV:
623 case RISCV::VWMULSU_VX:
624 case RISCV::VWMULU_VV:
625 case RISCV::VWMULU_VX:
626 // Vector Widening Integer Multiply-Add Instructions
627 // Destination EEW=2*SEW. Source EEW=SEW.
628 // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
629 // is then added to the 2*SEW-bit Dest. These instructions never have a
630 // passthru operand.
631 case RISCV::VWMACCU_VV:
632 case RISCV::VWMACCU_VX:
633 case RISCV::VWMACC_VV:
634 case RISCV::VWMACC_VX:
635 case RISCV::VWMACCSU_VV:
636 case RISCV::VWMACCSU_VX:
637 case RISCV::VWMACCUS_VX:
638 // Vector Widening Floating-Point Fused Multiply-Add Instructions
639 case RISCV::VFWMACC_VF:
640 case RISCV::VFWMACC_VV:
641 case RISCV::VFWNMACC_VF:
642 case RISCV::VFWNMACC_VV:
643 case RISCV::VFWMSAC_VF:
644 case RISCV::VFWMSAC_VV:
645 case RISCV::VFWNMSAC_VF:
646 case RISCV::VFWNMSAC_VV:
647 case RISCV::VFWMACCBF16_VV:
648 case RISCV::VFWMACCBF16_VF:
649 // Vector Widening Floating-Point Add/Subtract Instructions
650 // Dest EEW=2*SEW. Source EEW=SEW.
651 case RISCV::VFWADD_VV:
652 case RISCV::VFWADD_VF:
653 case RISCV::VFWSUB_VV:
654 case RISCV::VFWSUB_VF:
655 // Vector Widening Floating-Point Multiply
656 case RISCV::VFWMUL_VF:
657 case RISCV::VFWMUL_VV:
658 // Widening Floating-Point/Integer Type-Convert Instructions
659 case RISCV::VFWCVT_XU_F_V:
660 case RISCV::VFWCVT_X_F_V:
661 case RISCV::VFWCVT_RTZ_XU_F_V:
662 case RISCV::VFWCVT_RTZ_X_F_V:
663 case RISCV::VFWCVT_F_XU_V:
664 case RISCV::VFWCVT_F_X_V:
665 case RISCV::VFWCVT_F_F_V:
666 case RISCV::VFWCVTBF16_F_F_V:
667 // Zvabd
668 case RISCV::VWABDA_VV:
669 case RISCV::VWABDAU_VV:
670 return IsMODef ? MILog2SEW + 1 : MILog2SEW;
671
672 // Def and Op1 uses EEW=2*SEW. Op2 uses EEW=SEW.
673 case RISCV::VWADDU_WV:
674 case RISCV::VWADDU_WX:
675 case RISCV::VWSUBU_WV:
676 case RISCV::VWSUBU_WX:
677 case RISCV::VWADD_WV:
678 case RISCV::VWADD_WX:
679 case RISCV::VWSUB_WV:
680 case RISCV::VWSUB_WX:
681 // Vector Widening Floating-Point Add/Subtract Instructions
682 case RISCV::VFWADD_WF:
683 case RISCV::VFWADD_WV:
684 case RISCV::VFWSUB_WF:
685 case RISCV::VFWSUB_WV: {
686 bool IsOp1 = (HasPassthru && !IsTied) ? MO.getOperandNo() == 2
687 : MO.getOperandNo() == 1;
688 bool TwoTimes = IsMODef || IsOp1;
689 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
690 }
691
692 // Vector Integer Extension
693 case RISCV::VZEXT_VF2:
694 case RISCV::VSEXT_VF2:
695 return getIntegerExtensionOperandEEW(2, MI, MO);
696 case RISCV::VZEXT_VF4:
697 case RISCV::VSEXT_VF4:
698 return getIntegerExtensionOperandEEW(4, MI, MO);
699 case RISCV::VZEXT_VF8:
700 case RISCV::VSEXT_VF8:
701 return getIntegerExtensionOperandEEW(8, MI, MO);
702
703 // Vector Narrowing Integer Right Shift Instructions
704 // Destination EEW=SEW, Op 1 has EEW=2*SEW. Op2 has EEW=SEW
705 case RISCV::VNSRL_WX:
706 case RISCV::VNSRL_WI:
707 case RISCV::VNSRL_WV:
708 case RISCV::VNSRA_WI:
709 case RISCV::VNSRA_WV:
710 case RISCV::VNSRA_WX:
711 // Vector Narrowing Fixed-Point Clip Instructions
712 // Destination and Op1 EEW=SEW. Op2 EEW=2*SEW.
713 case RISCV::VNCLIPU_WI:
714 case RISCV::VNCLIPU_WV:
715 case RISCV::VNCLIPU_WX:
716 case RISCV::VNCLIP_WI:
717 case RISCV::VNCLIP_WV:
718 case RISCV::VNCLIP_WX:
719 // Narrowing Floating-Point/Integer Type-Convert Instructions
720 case RISCV::VFNCVT_XU_F_W:
721 case RISCV::VFNCVT_X_F_W:
722 case RISCV::VFNCVT_RTZ_XU_F_W:
723 case RISCV::VFNCVT_RTZ_X_F_W:
724 case RISCV::VFNCVT_F_XU_W:
725 case RISCV::VFNCVT_F_X_W:
726 case RISCV::VFNCVT_F_F_W:
727 case RISCV::VFNCVT_ROD_F_F_W:
728 case RISCV::VFNCVTBF16_F_F_W: {
729 assert(!IsTied);
730 bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
731 bool TwoTimes = IsOp1;
732 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
733 }
734
735 // Vector Mask Instructions
736 // Vector Mask-Register Logical Instructions
737 // vmsbf.m set-before-first mask bit
738 // vmsif.m set-including-first mask bit
739 // vmsof.m set-only-first mask bit
740 // EEW=1
741 // We handle the cases when operand is a v0 mask operand above the switch,
742 // but these instructions may use non-v0 mask operands and need to be handled
743 // specifically.
744 case RISCV::VMAND_MM:
745 case RISCV::VMNAND_MM:
746 case RISCV::VMANDN_MM:
747 case RISCV::VMXOR_MM:
748 case RISCV::VMOR_MM:
749 case RISCV::VMNOR_MM:
750 case RISCV::VMORN_MM:
751 case RISCV::VMXNOR_MM:
752 case RISCV::VMSBF_M:
753 case RISCV::VMSIF_M:
754 case RISCV::VMSOF_M: {
755 return MILog2SEW;
756 }
757
758 // Vector Compress Instruction
759 // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
760 // before this switch.
761 case RISCV::VCOMPRESS_VM:
762 return MO.getOperandNo() == 3 ? 0 : MILog2SEW;
763
764 // Vector Iota Instruction
765 // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
766 // before this switch.
767 case RISCV::VIOTA_M: {
768 if (IsMODef || MO.getOperandNo() == 1)
769 return MILog2SEW;
770 return 0;
771 }
772
773 // Vector Integer Compare Instructions
774 // Dest EEW=1. Source EEW=SEW.
775 case RISCV::VMSEQ_VI:
776 case RISCV::VMSEQ_VV:
777 case RISCV::VMSEQ_VX:
778 case RISCV::VMSNE_VI:
779 case RISCV::VMSNE_VV:
780 case RISCV::VMSNE_VX:
781 case RISCV::VMSLTU_VV:
782 case RISCV::VMSLTU_VX:
783 case RISCV::VMSLT_VV:
784 case RISCV::VMSLT_VX:
785 case RISCV::VMSLEU_VV:
786 case RISCV::VMSLEU_VI:
787 case RISCV::VMSLEU_VX:
788 case RISCV::VMSLE_VV:
789 case RISCV::VMSLE_VI:
790 case RISCV::VMSLE_VX:
791 case RISCV::VMSGTU_VI:
792 case RISCV::VMSGTU_VX:
793 case RISCV::VMSGT_VI:
794 case RISCV::VMSGT_VX:
795 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
796 // Dest EEW=1. Source EEW=SEW. Mask source operand handled above this switch.
797 case RISCV::VMADC_VIM:
798 case RISCV::VMADC_VVM:
799 case RISCV::VMADC_VXM:
800 case RISCV::VMSBC_VVM:
801 case RISCV::VMSBC_VXM:
802 // Dest EEW=1. Source EEW=SEW.
803 case RISCV::VMADC_VV:
804 case RISCV::VMADC_VI:
805 case RISCV::VMADC_VX:
806 case RISCV::VMSBC_VV:
807 case RISCV::VMSBC_VX:
808 // 13.13. Vector Floating-Point Compare Instructions
809 // Dest EEW=1. Source EEW=SEW
810 case RISCV::VMFEQ_VF:
811 case RISCV::VMFEQ_VV:
812 case RISCV::VMFNE_VF:
813 case RISCV::VMFNE_VV:
814 case RISCV::VMFLT_VF:
815 case RISCV::VMFLT_VV:
816 case RISCV::VMFLE_VF:
817 case RISCV::VMFLE_VV:
818 case RISCV::VMFGT_VF:
819 case RISCV::VMFGE_VF: {
820 if (IsMODef)
821 return 0;
822 return MILog2SEW;
823 }
824
825 // Vector Reduction Operations
826 // Vector Single-Width Integer Reduction Instructions
827 case RISCV::VREDAND_VS:
828 case RISCV::VREDMAX_VS:
829 case RISCV::VREDMAXU_VS:
830 case RISCV::VREDMIN_VS:
831 case RISCV::VREDMINU_VS:
832 case RISCV::VREDOR_VS:
833 case RISCV::VREDSUM_VS:
834 case RISCV::VREDXOR_VS:
835 // Vector Single-Width Floating-Point Reduction Instructions
836 case RISCV::VFREDMAX_VS:
837 case RISCV::VFREDMIN_VS:
838 case RISCV::VFREDOSUM_VS:
839 case RISCV::VFREDUSUM_VS: {
840 return MILog2SEW;
841 }
842
843 // Vector Widening Integer Reduction Instructions
844 // The Dest and VS1 read only element 0 for the vector register. Return
845 // 2*EEW for these. VS2 has EEW=SEW and EMUL=LMUL.
846 case RISCV::VWREDSUM_VS:
847 case RISCV::VWREDSUMU_VS:
848 // Vector Widening Floating-Point Reduction Instructions
849 case RISCV::VFWREDOSUM_VS:
850 case RISCV::VFWREDUSUM_VS: {
851 bool TwoTimes = IsMODef || MO.getOperandNo() == 3;
852 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
853 }
854
855 // Vector Register Gather with 16-bit Index Elements Instruction
856 // Dest and source data EEW=SEW. Index vector EEW=16.
857 case RISCV::VRGATHEREI16_VV: {
858 if (MO.getOperandNo() == 2)
859 return 4;
860 return MILog2SEW;
861 }
862
863 default:
864 return std::nullopt;
865 }
866}
867
868static std::optional<OperandInfo> getOperandInfo(const MachineOperand &MO) {
869 const MachineInstr &MI = *MO.getParent();
871 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
872 assert(RVV && "Could not find MI in PseudoTable");
873
874 std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO);
875 if (!Log2EEW)
876 return std::nullopt;
877
878 switch (RVV->BaseInstr) {
879 // Vector Reduction Operations
880 // Vector Single-Width Integer Reduction Instructions
881 // Vector Widening Integer Reduction Instructions
882 // Vector Widening Floating-Point Reduction Instructions
883 // The Dest and VS1 only read element 0 of the vector register. Return just
884 // the EEW for these.
885 case RISCV::VREDAND_VS:
886 case RISCV::VREDMAX_VS:
887 case RISCV::VREDMAXU_VS:
888 case RISCV::VREDMIN_VS:
889 case RISCV::VREDMINU_VS:
890 case RISCV::VREDOR_VS:
891 case RISCV::VREDSUM_VS:
892 case RISCV::VREDXOR_VS:
893 case RISCV::VWREDSUM_VS:
894 case RISCV::VWREDSUMU_VS:
895 case RISCV::VFWREDOSUM_VS:
896 case RISCV::VFWREDUSUM_VS:
897 if (MO.getOperandNo() != 2)
898 return OperandInfo(*Log2EEW);
899 break;
900 };
901
902 // All others have EMUL=EEW/SEW*LMUL
903 return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI), *Log2EEW);
904}
905
906static bool isTupleInsertInstr(const MachineInstr &MI);
907
908/// Return true if we can reason about demanded VLs elementwise for \p MI.
909bool RISCVVLOptimizer::isSupportedInstr(const MachineInstr &MI) const {
910 if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
911 return true;
912
913 unsigned RVVOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
914 if (!RVVOpc)
915 return false;
916
917 assert(!(MI.getNumExplicitDefs() == 0 && !MI.mayStore() &&
918 !RISCVII::elementsDependOnVL(TII->get(RVVOpc).TSFlags)) &&
919 "No defs but elements don't depend on VL?");
920
921 // TODO: Reduce vl for vmv.s.x and vfmv.s.f. Currently this introduces more vl
922 // toggles, we need to extend PRE in RISCVInsertVSETVLI first.
923 if (RVVOpc == RISCV::VMV_S_X || RVVOpc == RISCV::VFMV_S_F)
924 return false;
925
926 if (RISCVII::elementsDependOnVL(TII->get(RVVOpc).TSFlags))
927 return false;
928
929 if (MI.mayStore())
930 return false;
931
932 return true;
933}
934
935/// Return true if MO is a vector operand but is used as a scalar operand.
937 const MachineInstr *MI = MO.getParent();
939 RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
940
941 if (!RVV)
942 return false;
943
944 switch (RVV->BaseInstr) {
945 // Reductions only use vs1[0] of vs1
946 case RISCV::VREDAND_VS:
947 case RISCV::VREDMAX_VS:
948 case RISCV::VREDMAXU_VS:
949 case RISCV::VREDMIN_VS:
950 case RISCV::VREDMINU_VS:
951 case RISCV::VREDOR_VS:
952 case RISCV::VREDSUM_VS:
953 case RISCV::VREDXOR_VS:
954 case RISCV::VWREDSUM_VS:
955 case RISCV::VWREDSUMU_VS:
956 case RISCV::VFREDMAX_VS:
957 case RISCV::VFREDMIN_VS:
958 case RISCV::VFREDOSUM_VS:
959 case RISCV::VFREDUSUM_VS:
960 case RISCV::VFWREDOSUM_VS:
961 case RISCV::VFWREDUSUM_VS:
962 return MO.getOperandNo() == 3;
963 case RISCV::VMV_X_S:
964 case RISCV::VFMV_F_S:
965 return MO.getOperandNo() == 1;
966 default:
967 return false;
968 }
969}
970
971bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
972 const MCInstrDesc &Desc = MI.getDesc();
973 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
974 return false;
975
976 if (MI.getNumExplicitDefs() != 1)
977 return false;
978
979 // Some instructions have implicit defs e.g. $vxsat. If they might be read
980 // later then we can't reduce VL.
981 if (!MI.allImplicitDefsAreDead()) {
982 LLVM_DEBUG(dbgs() << "Not a candidate because has non-dead implicit def\n");
983 return false;
984 }
985
986 if (MI.mayRaiseFPException()) {
987 LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");
988 return false;
989 }
990
991 for (const MachineMemOperand *MMO : MI.memoperands()) {
992 if (MMO->isVolatile()) {
993 LLVM_DEBUG(dbgs() << "Not a candidate because contains volatile MMO\n");
994 return false;
995 }
996 }
997
998 if (!isSupportedInstr(MI)) {
999 LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction: "
1000 << MI);
1001 return false;
1002 }
1003
1005 TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) &&
1006 "Instruction shouldn't be supported if elements depend on VL");
1007
1009 MRI->getRegClass(MI.getOperand(0).getReg())->TSFlags) &&
1010 "All supported instructions produce a vector register result");
1011
1012 LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n");
1013 return true;
1014}
1015
1016/// Given a vslidedown.vx like:
1017///
1018/// %slideamt = ADDI %x, -1
1019/// %v = PseudoVSLIDEDOWN_VX %passthru, %src, %slideamt, avl=1
1020///
1021/// %v will only read the first %slideamt + 1 lanes of %src, which = %x.
1022/// This is a common case when lowering extractelement.
1023///
1024/// Note that if %x is 0, %slideamt will be all ones. In this case %src will be
1025/// completely slid down and none of its lanes will be read (since %slideamt is
1026/// greater than the largest VLMAX of 65536) so we can demand any minimum VL.
1027static std::optional<DemandedVL>
1029 const MachineRegisterInfo *MRI) {
1030 const MachineInstr &MI = *UserOp.getParent();
1031 if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VSLIDEDOWN_VX)
1032 return std::nullopt;
1033 // We're looking at what lanes are used from the src operand.
1034 if (UserOp.getOperandNo() != 2)
1035 return std::nullopt;
1036 // For now, the AVL must be 1.
1037 const MachineOperand &AVL = MI.getOperand(4);
1038 if (!AVL.isImm() || AVL.getImm() != 1)
1039 return std::nullopt;
1040 // The slide amount must be %x - 1.
1041 const MachineOperand &SlideAmt = MI.getOperand(3);
1042 if (!SlideAmt.getReg().isVirtual())
1043 return std::nullopt;
1044 MachineInstr *SlideAmtDef = MRI->getUniqueVRegDef(SlideAmt.getReg());
1045 if (SlideAmtDef->getOpcode() != RISCV::ADDI ||
1046 SlideAmtDef->getOperand(2).getImm() != -AVL.getImm() ||
1047 !SlideAmtDef->getOperand(1).getReg().isVirtual())
1048 return std::nullopt;
1049 return SlideAmtDef->getOperand(1);
1050}
1051
1052DemandedVL
1053RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
1054 const MachineInstr &UserMI = *UserOp.getParent();
1055 const MCInstrDesc &Desc = UserMI.getDesc();
1056
1057 if (UserMI.isPHI() || UserMI.isFullCopy() || isTupleInsertInstr(UserMI))
1058 return DemandedVLs.lookup(&UserMI);
1059
1060 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
1061 LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
1062 " use VLMAX\n");
1063 return DemandedVL::vlmax();
1064 }
1065
1066 if (auto VL = getMinimumVLForVSLIDEDOWN_VX(UserOp, MRI))
1067 return *VL;
1068
1070 TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
1071 LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
1072 return DemandedVL::vlmax();
1073 }
1074
1075 unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1076 const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1077 // Looking for an immediate or a register VL that isn't X0.
1078 assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1079 "Did not expect X0 VL");
1080
1081 // If the user is a passthru it will read the elements past VL, so
1082 // abort if any of the elements past VL are demanded.
1083 if (UserOp.isTied()) {
1084 assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() &&
1086 if (!RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) {
1087 LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
1088 "instruction with demanded tail\n");
1089 return DemandedVL::vlmax();
1090 }
1091 }
1092
1093 // Instructions like reductions may use a vector register as a scalar
1094 // register. In this case, we should treat it as only reading the first lane.
1095 if (isVectorOpUsedAsScalarOp(UserOp)) {
1096 LLVM_DEBUG(dbgs() << " Used this operand as a scalar operand\n");
1097 return MachineOperand::CreateImm(1);
1098 }
1099
1100 // If we know the demanded VL of UserMI, then we can reduce the VL it
1101 // requires.
1102 if (RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp))
1103 return DemandedVLs.lookup(&UserMI);
1104
1105 return VLOp;
1106}
1107
1108/// Return true if MI is an instruction used for assembling registers
1109/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
1110/// Currently it's lowered to INSERT_SUBREG.
1112 if (!MI.isInsertSubreg())
1113 return false;
1114
1115 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1116 const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
1118 if (!RISCVRI::isVRegClass(DstRC->TSFlags))
1119 return false;
1120 unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
1121 if (NF < 2)
1122 return false;
1123
1124 // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
1125 auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
1126 unsigned SubRegIdx = MI.getOperand(3).getImm();
1127 [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
1128 assert(!IsFractional && "unexpected LMUL for tuple register classes");
1129 return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
1130}
1131
1133 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
1134 case VSSEG_CASES(8):
1135 case VSSSEG_CASES(8):
1136 case VSUXSEG_CASES(8):
1137 case VSOXSEG_CASES(8):
1138 case VSSEG_CASES(16):
1139 case VSSSEG_CASES(16):
1140 case VSUXSEG_CASES(16):
1141 case VSOXSEG_CASES(16):
1142 case VSSEG_CASES(32):
1143 case VSSSEG_CASES(32):
1144 case VSUXSEG_CASES(32):
1145 case VSOXSEG_CASES(32):
1146 case VSSEG_CASES(64):
1147 case VSSSEG_CASES(64):
1148 case VSUXSEG_CASES(64):
1149 case VSOXSEG_CASES(64):
1150 return true;
1151 default:
1152 return false;
1153 }
1154}
1155
1156bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
1157 if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
1158 return true;
1159
1160 SmallSetVector<MachineOperand *, 8> OpWorklist;
1161 SmallPtrSet<const MachineInstr *, 4> PHISeen;
1162 for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
1163 OpWorklist.insert(&UserOp);
1164
1165 while (!OpWorklist.empty()) {
1166 MachineOperand &UserOp = *OpWorklist.pop_back_val();
1167 const MachineInstr &UserMI = *UserOp.getParent();
1168 LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
1169
1170 if (UserMI.isFullCopy() && UserMI.getOperand(0).getReg().isVirtual()) {
1171 LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n");
1173 MRI->use_operands(UserMI.getOperand(0).getReg())));
1174 continue;
1175 }
1176
1177 if (isTupleInsertInstr(UserMI)) {
1178 LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
1179 for (MachineOperand &UseOp :
1180 MRI->use_operands(UserMI.getOperand(0).getReg())) {
1181 const MachineInstr &CandidateMI = *UseOp.getParent();
1182 // We should not propagate the VL if the user is not a segmented store
1183 // or another INSERT_SUBREG, since VL just works differently
1184 // between segmented operations (per-field) v.s. other RVV ops (on the
1185 // whole register group).
1186 if (!isTupleInsertInstr(CandidateMI) &&
1187 !isSegmentedStoreInstr(CandidateMI))
1188 return false;
1189 OpWorklist.insert(&UseOp);
1190 }
1191 continue;
1192 }
1193
1194 if (UserMI.isPHI()) {
1195 // Don't follow PHI cycles
1196 if (!PHISeen.insert(&UserMI).second)
1197 continue;
1198 LLVM_DEBUG(dbgs() << " Peeking through uses of PHI\n");
1200 MRI->use_operands(UserMI.getOperand(0).getReg())));
1201 continue;
1202 }
1203
1204 if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
1205 LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n");
1206 return false;
1207 }
1208
1209 std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp);
1210 std::optional<OperandInfo> ProducerInfo = getOperandInfo(MI.getOperand(0));
1211 if (!ConsumerInfo || !ProducerInfo) {
1212 LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
1213 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1214 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1215 return false;
1216 }
1217
1218 if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) {
1219 LLVM_DEBUG(
1220 dbgs()
1221 << " Abort due to incompatible information for EMUL or EEW.\n");
1222 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1223 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1224 return false;
1225 }
1226 }
1227
1228 return true;
1229}
1230
1231bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI,
1232 MachineOperand CommonVL) const {
1233 LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
1234
1235 unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
1236 MachineOperand &VLOp = MI.getOperand(VLOpNum);
1237
1238 assert((CommonVL.isImm() || CommonVL.getReg().isVirtual()) &&
1239 "Expected VL to be an Imm or virtual Reg");
1240
1241 // If the VL is defined by a vleff that doesn't dominate MI, try using the
1242 // vleff's AVL. It will be greater than or equal to the output VL.
1243 if (CommonVL.isReg()) {
1244 const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
1245 if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
1246 !MDT->dominates(VLMI, &MI))
1247 CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
1248 }
1249
1250 if (!RISCV::isVLKnownLE(CommonVL, VLOp)) {
1251 LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
1252 return false;
1253 }
1254
1255 if (CommonVL.isIdenticalTo(VLOp)) {
1256 LLVM_DEBUG(
1257 dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
1258 return false;
1259 }
1260
1261 if (CommonVL.isImm()) {
1262 LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1263 << CommonVL.getImm() << " for " << MI << "\n");
1264 VLOp.ChangeToImmediate(CommonVL.getImm());
1265 return true;
1266 }
1267 MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
1268 auto VLDominates = [this, &VLMI](const MachineInstr &MI) {
1269 return MDT->dominates(VLMI, &MI);
1270 };
1271 if (!VLDominates(MI)) {
1272 assert(MI.getNumExplicitDefs() == 1);
1273 auto Uses = MRI->use_instructions(MI.getOperand(0).getReg());
1274 auto UsesSameBB = make_filter_range(Uses, [&MI](const MachineInstr &Use) {
1275 return Use.getParent() == MI.getParent();
1276 });
1277 if (VLMI->getParent() == MI.getParent() &&
1278 all_of(UsesSameBB, VLDominates) &&
1280 MI.moveBefore(VLMI->getNextNode());
1281 } else {
1282 LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n");
1283 return false;
1284 }
1285 }
1286 LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1287 << printReg(CommonVL.getReg(), MRI->getTargetRegisterInfo())
1288 << " for " << MI << "\n");
1289
1290 // All our checks passed. We can reduce VL.
1291 VLOp.ChangeToRegister(CommonVL.getReg(), false);
1292 MRI->constrainRegClass(CommonVL.getReg(), &RISCV::GPRNoX0RegClass);
1293 return true;
1294}
1295
1296static bool isPhysical(const MachineOperand &MO) {
1297 return MO.isReg() && MO.getReg().isPhysical();
1298}
1299
1300/// Look through \p MI's operands and propagate what it demands to its uses.
1301void RISCVVLOptimizer::transfer(const MachineInstr &MI) {
1302 if (!isSupportedInstr(MI) || !checkUsers(MI) || any_of(MI.defs(), isPhysical))
1303 DemandedVLs[&MI] = DemandedVL::vlmax();
1304
1305 for (const MachineOperand &MO : virtual_vec_uses(MI)) {
1306 const MachineInstr *Def = MRI->getVRegDef(MO.getReg());
1307 DemandedVL Prev = DemandedVLs[Def];
1308 DemandedVLs[Def] = DemandedVLs[Def].max(getMinimumVLForUser(MO));
1309 if (DemandedVLs[Def] != Prev)
1310 Worklist.insert(Def);
1311 }
1312}
1313
1314bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
1315 if (skipFunction(MF.getFunction()))
1316 return false;
1317
1318 MRI = &MF.getRegInfo();
1319 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1320
1321 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1322 if (!ST.hasVInstructions())
1323 return false;
1324
1325 TII = ST.getInstrInfo();
1326
1327 assert(DemandedVLs.empty());
1328
1329 // For each instruction that defines a vector, propagate the VL it
1330 // uses to its inputs.
1331 for (MachineBasicBlock *MBB : post_order(&MF)) {
1333 for (MachineInstr &MI : reverse(*MBB))
1334 if (!MI.isDebugInstr())
1335 Worklist.insert(&MI);
1336 }
1337
1338 while (!Worklist.empty()) {
1339 const MachineInstr *MI = Worklist.front();
1340 Worklist.remove(MI);
1341 transfer(*MI);
1342 }
1343
1344 // Then go through and see if we can reduce the VL of any instructions to
1345 // only what's demanded.
1346 bool MadeChange = false;
1347 for (auto &[MI, VL] : DemandedVLs) {
1348 assert(MDT->isReachableFromEntry(MI->getParent()));
1349 if (!isCandidate(*MI))
1350 continue;
1351 if (!tryReduceVL(*const_cast<MachineInstr *>(MI), VL.VL))
1352 continue;
1353 MadeChange = true;
1354 }
1355
1356 DemandedVLs.clear();
1357 return MadeChange;
1358}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static std::optional< DemandedVL > getMinimumVLForVSLIDEDOWN_VX(const MachineOperand &UserOp, const MachineRegisterInfo *MRI)
Given a vslidedown.vx like:
static unsigned getIntegerExtensionOperandEEW(unsigned Factor, const MachineInstr &MI, const MachineOperand &MO)
Dest has EEW=SEW.
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO)
#define VSOXSEG_CASES(EEW)
static bool isSegmentedStoreInstr(const MachineInstr &MI)
static bool isVectorOpUsedAsScalarOp(const MachineOperand &MO)
Return true if MO is a vector operand but is used as a scalar operand.
static std::optional< unsigned > getOperandLog2EEW(const MachineOperand &MO)
static std::pair< unsigned, bool > getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI)
Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and SEW are from the TSFlags o...
#define VSUXSEG_CASES(EEW)
static bool isPhysical(const MachineOperand &MO)
#define VSSSEG_CASES(EEW)
#define VSSEG_CASES(EEW)
static bool isTupleInsertInstr(const MachineInstr &MI)
Return true if MI is an instruction used for assembling registers for segmented store instructions,...
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
bool isReachableFromEntry(const NodeT *A) const
isReachableFromEntry - Return true if A is dominated by the entry block of the function containing it...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Describe properties that are true of each instruction in the target description file.
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Return true if moving From down to To won't cause any physical register reads or writes to be clobber...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A vector that has set insertion semantics.
Definition SetVector.h:57
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
const uint8_t TSFlags
Configurable target specific flags.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
static bool readsPastVL(uint64_t TSFlags)
static bool isTiedPseudo(uint64_t TSFlags)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool elementsDependOnVL(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
static unsigned getNF(uint8_t TSFlags)
static bool isVRegClass(uint8_t TSFlags)
static RISCVVType::VLMUL getLMul(uint8_t TSFlags)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS)
Given two VL operands, do we know that LHS <= RHS?
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2128
FunctionPass * createRISCVVLOptimizerPass()
iterator_range< po_iterator< T > > post_order(const T &G)
Op::Description Desc
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:552
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.