LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
357 }
358 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
360 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
362 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
365 }
366 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
374 VT, Expand);
382 }
384 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
385 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
386 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
388
389 for (MVT VT :
390 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
391 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
401 }
404 // We want to legalize this to an f64 load rather than an i64 load.
405 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
406 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16})
408 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16i32, MVT::v8i64,
409 MVT::v16i64})
411 }
412
413 // Set operations for 'LASX' feature.
414
415 if (Subtarget.hasExtLASX()) {
416 for (MVT VT : LASXVTs) {
420
426
430 }
431 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
434 Legal);
436 VT, Legal);
443 Expand);
459 }
460 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
462 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
464 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
467 }
468 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
476 VT, Expand);
484 }
487 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
490 }
491 for (MVT VT :
492 {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
495 }
496 }
497
498 // Set DAG combine for LA32 and LA64.
499 if (Subtarget.hasBasicF()) {
501 }
502
507
508 // Set DAG combine for 'LSX' feature.
509
510 if (Subtarget.hasExtLSX()) {
515 }
516
517 // Set DAG combine for 'LASX' feature.
518 if (Subtarget.hasExtLASX()) {
523 }
524
525 // Compute derived properties from the register classes.
526 computeRegisterProperties(Subtarget.getRegisterInfo());
527
529
532
533 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
534
536
537 // Function alignments.
539 // Set preferred alignments.
540 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
541 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
542 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
543
544 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
545 if (Subtarget.hasLAMCAS())
547
548 if (Subtarget.hasSCQ()) {
551 }
552
553 // Disable strict node mutation.
554 IsStrictFPEnabled = true;
555}
556
558 const GlobalAddressSDNode *GA) const {
559 // In order to maximise the opportunity for common subexpression elimination,
560 // keep a separate ADD node for the global address offset instead of folding
561 // it in the global address node. Later peephole optimisations may choose to
562 // fold it back in when profitable.
563 return false;
564}
565
567 SelectionDAG &DAG) const {
568 switch (Op.getOpcode()) {
570 return lowerATOMIC_FENCE(Op, DAG);
572 return lowerEH_DWARF_CFA(Op, DAG);
574 return lowerGlobalAddress(Op, DAG);
576 return lowerGlobalTLSAddress(Op, DAG);
578 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
580 return lowerINTRINSIC_W_CHAIN(Op, DAG);
582 return lowerINTRINSIC_VOID(Op, DAG);
584 return lowerBlockAddress(Op, DAG);
585 case ISD::JumpTable:
586 return lowerJumpTable(Op, DAG);
587 case ISD::SHL_PARTS:
588 return lowerShiftLeftParts(Op, DAG);
589 case ISD::SRA_PARTS:
590 return lowerShiftRightParts(Op, DAG, true);
591 case ISD::SRL_PARTS:
592 return lowerShiftRightParts(Op, DAG, false);
594 return lowerConstantPool(Op, DAG);
595 case ISD::FP_TO_SINT:
596 return lowerFP_TO_SINT(Op, DAG);
597 case ISD::BITCAST:
598 return lowerBITCAST(Op, DAG);
599 case ISD::UINT_TO_FP:
600 return lowerUINT_TO_FP(Op, DAG);
601 case ISD::SINT_TO_FP:
602 return lowerSINT_TO_FP(Op, DAG);
603 case ISD::VASTART:
604 return lowerVASTART(Op, DAG);
605 case ISD::FRAMEADDR:
606 return lowerFRAMEADDR(Op, DAG);
607 case ISD::RETURNADDR:
608 return lowerRETURNADDR(Op, DAG);
610 return lowerWRITE_REGISTER(Op, DAG);
612 return lowerINSERT_VECTOR_ELT(Op, DAG);
614 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
616 return lowerBUILD_VECTOR(Op, DAG);
618 return lowerCONCAT_VECTORS(Op, DAG);
620 return lowerVECTOR_SHUFFLE(Op, DAG);
621 case ISD::BITREVERSE:
622 return lowerBITREVERSE(Op, DAG);
624 return lowerSCALAR_TO_VECTOR(Op, DAG);
625 case ISD::PREFETCH:
626 return lowerPREFETCH(Op, DAG);
627 case ISD::SELECT:
628 return lowerSELECT(Op, DAG);
629 case ISD::BRCOND:
630 return lowerBRCOND(Op, DAG);
631 case ISD::FP_TO_FP16:
632 return lowerFP_TO_FP16(Op, DAG);
633 case ISD::FP16_TO_FP:
634 return lowerFP16_TO_FP(Op, DAG);
635 case ISD::FP_TO_BF16:
636 return lowerFP_TO_BF16(Op, DAG);
637 case ISD::BF16_TO_FP:
638 return lowerBF16_TO_FP(Op, DAG);
640 return lowerVECREDUCE_ADD(Op, DAG);
641 case ISD::ROTL:
642 case ISD::ROTR:
643 return lowerRotate(Op, DAG);
651 return lowerVECREDUCE(Op, DAG);
652 case ISD::ConstantFP:
653 return lowerConstantFP(Op, DAG);
654 case ISD::SETCC:
655 return lowerSETCC(Op, DAG);
656 case ISD::FP_ROUND:
657 return lowerFP_ROUND(Op, DAG);
658 case ISD::FP_EXTEND:
659 return lowerFP_EXTEND(Op, DAG);
661 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
663 return lowerDYNAMIC_STACKALLOC(Op, DAG);
664 }
665 return SDValue();
666}
667
668// Helper to attempt to return a cheaper, bit-inverted version of \p V.
670 // TODO: don't always ignore oneuse constraints.
671 V = peekThroughBitcasts(V);
672 EVT VT = V.getValueType();
673
674 // Match not(xor X, -1) -> X.
675 if (V.getOpcode() == ISD::XOR &&
676 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
677 isAllOnesConstant(V.getOperand(1))))
678 return V.getOperand(0);
679
680 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
681 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
682 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
683 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
684 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
685 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
686 V.getOperand(1));
687 }
688 }
689
690 // Match not(SplatVector(not(X)) -> SplatVector(X).
691 if (V.getOpcode() == ISD::BUILD_VECTOR) {
692 if (SDValue SplatValue =
693 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
694 if (!V->isOnlyUserOf(SplatValue.getNode()))
695 return SDValue();
696
697 if (SDValue Not = isNOT(SplatValue, DAG)) {
698 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
699 return DAG.getSplat(VT, SDLoc(Not), Not);
700 }
701 }
702 }
703
704 // Match not(or(not(X),not(Y))) -> and(X, Y).
705 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
706 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
707 // TODO: Handle cases with single NOT operand -> VANDN
708 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
709 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
710 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
711 DAG.getBitcast(VT, Op1));
712 }
713
714 // TODO: Add more matching patterns. Such as,
715 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
716 // not(slt(C, X)) -> slt(X - 1, C)
717 return SDValue();
718}
719
720// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
721// LoongArchISD::VFCVT. For example:
722// x1 = fp_round x, 0
723// y1 = fp_round y, 0
724// z = concat_vectors x1, y1
725// Or
726// x1 = LoongArch::VFCVT undef, x
727// y1 = LoongArch::VFCVT undef, y
728// z = LoongArchISD::VPACKEV y1, x1; or LoongArchISD::VPERMI y1, x1, 68
729// can be combined to:
730// z = LoongArch::VFCVT y, x
732 const LoongArchSubtarget &Subtarget) {
733 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
734 (N->getOpcode() == LoongArchISD::VPACKEV) ||
735 (N->getOpcode() == LoongArchISD::VPERMI)) &&
736 "Invalid Node");
737
738 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
739 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
740 unsigned Opcode0 = Op0.getOpcode();
741 unsigned Opcode1 = Op1.getOpcode();
742 if (Opcode0 != Opcode1)
743 return SDValue();
744
745 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
746 return SDValue();
747
748 // Check if two nodes have only one use.
749 if (!Op0.hasOneUse() || !Op1.hasOneUse())
750 return SDValue();
751
752 EVT VT = N.getValueType();
753 EVT SVT0 = Op0.getValueType();
754 EVT SVT1 = Op1.getValueType();
755 // Check if two nodes have the same result type.
756 if (SVT0 != SVT1)
757 return SDValue();
758
759 // Check if two nodes have the same operand type.
760 EVT SSVT0 = Op0.getOperand(0).getValueType();
761 EVT SSVT1 = Op1.getOperand(0).getValueType();
762 if (SSVT0 != SSVT1)
763 return SDValue();
764
765 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
766 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
767 SSVT0 == MVT::v4f64) {
768 // A vector_shuffle is required in the final step, as xvfcvt instruction
769 // operates on each 128-bit segament as a lane.
770 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
771 Op1.getOperand(0), Op0.getOperand(0));
772 SDValue Undef = DAG.getUNDEF(Res.getValueType());
773 // After VFCVT, the high part of Res comes from the high parts of Op0 and
774 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
775 // the desired order requires Op0 to fully occupy the lower half and Op1
776 // the upper half of Res. The Mask reorders the elements of Res to achieve
777 // this:
778 // - The first four elements (0, 1, 4, 5) come from Op0.
779 // - The next four elements (2, 3, 6, 7) come from Op1.
780 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
781 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
782 return DAG.getBitcast(VT, Res);
783 }
784 }
785
786 if ((N->getOpcode() == LoongArchISD::VPACKEV ||
787 N->getOpcode() == LoongArchISD::VPERMI) &&
788 Opcode0 == LoongArchISD::VFCVT) {
789 // For VPACKEV or VPERMI, check if the first operation of VFCVT is undef.
790 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
791 return SDValue();
792
793 if (!Subtarget.hasExtLSX() || SVT0 != MVT::v4f32 || SSVT0 != MVT::v2f64)
794 return SDValue();
795
796 if (N->getOpcode() == LoongArchISD::VPACKEV &&
797 (VT == MVT::v2i64 || VT == MVT::v2f64)) {
798 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
799 Op0.getOperand(1), Op1.getOperand(1));
800 return DAG.getBitcast(VT, Res);
801 }
802
803 if (N->getOpcode() == LoongArchISD::VPERMI && VT == MVT::v4f32) {
804 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
805 if (Imm != 68)
806 return SDValue();
807 return DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Op0.getOperand(1),
808 Op1.getOperand(1));
809 }
810 }
811
812 return SDValue();
813}
814
815SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
816 SelectionDAG &DAG) const {
817 SDLoc DL(Op);
818 SDValue In = Op.getOperand(0);
819 MVT VT = Op.getSimpleValueType();
820 MVT SVT = In.getSimpleValueType();
821
822 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
823 SDValue Lo, Hi;
824 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
825 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
826 }
827
828 return SDValue();
829}
830
831SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
832 SelectionDAG &DAG) const {
833
834 SDLoc DL(Op);
835 EVT VT = Op.getValueType();
836 SDValue Src = Op->getOperand(0);
837 EVT SVT = Src.getValueType();
838
839 bool V2F32ToV2F64 =
840 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
841 bool V4F32ToV4F64 =
842 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
843 if (!V2F32ToV2F64 && !V4F32ToV4F64)
844 return SDValue();
845
846 // Check if Op is the high part of vector.
847 auto CheckVecHighPart = [](SDValue Op) {
849 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
850 SDValue SOp = Op.getOperand(0);
851 EVT SVT = SOp.getValueType();
852 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
853 return SDValue();
854
855 const uint64_t Imm = Op.getConstantOperandVal(1);
856 if (Imm == SVT.getVectorNumElements() / 2)
857 return SOp;
858 return SDValue();
859 }
860 return SDValue();
861 };
862
863 unsigned Opcode;
864 SDValue VFCVTOp;
865 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
866 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
867
868 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
869 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
870 if (SDValue V = CheckVecHighPart(Src)) {
871 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
872 "Unexpected wide vector");
873 Opcode = LoongArchISD::VFCVTH;
874 VFCVTOp = DAG.getBitcast(WideOpVT, V);
875 } else {
876 Opcode = LoongArchISD::VFCVTL;
877 VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
878 DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
879 }
880
881 // v2f64 = fp_extend v2f32
882 if (V2F32ToV2F64)
883 return DAG.getNode(Opcode, DL, VT, VFCVTOp);
884
885 // v4f64 = fp_extend v4f32
886 if (V4F32ToV4F64) {
887 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
888 // vector_shuffle is required firstly.
889 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
890 SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
891 DAG.getUNDEF(WideOpVT), Mask);
892 Res = DAG.getNode(Opcode, DL, VT, Res);
893 return Res;
894 }
895
896 return SDValue();
897}
898
899SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
900 SelectionDAG &DAG) const {
901 EVT VT = Op.getValueType();
902 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
903 const APFloat &FPVal = CFP->getValueAPF();
904 SDLoc DL(CFP);
905
906 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
907 (VT == MVT::f64 && Subtarget.hasBasicD()));
908
909 // If value is 0.0 or -0.0, just ignore it.
910 if (FPVal.isZero())
911 return SDValue();
912
913 // If lsx enabled, use cheaper 'vldi' instruction if possible.
914 if (isFPImmVLDILegal(FPVal, VT))
915 return SDValue();
916
917 // Construct as integer, and move to float register.
918 APInt INTVal = FPVal.bitcastToAPInt();
919
920 // If more than MaterializeFPImmInsNum instructions will be used to
921 // generate the INTVal and move it to float register, fallback to
922 // use floating point load from the constant pool.
924 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
925 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
926 return SDValue();
927
928 switch (VT.getSimpleVT().SimpleTy) {
929 default:
930 llvm_unreachable("Unexpected floating point type!");
931 break;
932 case MVT::f32: {
933 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
934 if (Subtarget.is64Bit())
935 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
936 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
937 : LoongArchISD::MOVGR2FR_W,
938 DL, VT, NewVal);
939 }
940 case MVT::f64: {
941 if (Subtarget.is64Bit()) {
942 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
943 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
944 }
945 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
946 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
947 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
948 }
949 }
950
951 return SDValue();
952}
953
954// Ensure SETCC result and operand have the same bit width; isel does not
955// support mismatched widths.
956SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
957 SelectionDAG &DAG) const {
958 SDLoc DL(Op);
959 EVT ResultVT = Op.getValueType();
960 EVT OperandVT = Op.getOperand(0).getValueType();
961
962 EVT SetCCResultVT =
963 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
964
965 if (ResultVT == SetCCResultVT)
966 return Op;
967
968 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
969 "SETCC operands must have the same type!");
970
971 SDValue SetCCNode =
972 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
973 Op.getOperand(1), Op.getOperand(2));
974
975 if (ResultVT.bitsGT(SetCCResultVT))
976 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
977 else if (ResultVT.bitsLT(SetCCResultVT))
978 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
979
980 return SetCCNode;
981}
982
983// Lower sext_invec using vslti instructions.
984// For example:
985// %b = sext <4 x i16> %a to <4 x i32>
986// can be lowered to:
987// VSLTI_H vr2, vr1, 0
988// VILVL.H vr1, vr2, vr1
989SDValue LoongArchTargetLowering::lowerSIGN_EXTEND_VECTOR_INREG(
990 SDValue Op, SelectionDAG &DAG) const {
991 SDLoc DL(Op);
992 SDValue Src = Op.getOperand(0);
993 MVT SrcVT = Src.getSimpleValueType();
994 MVT DstVT = Op.getSimpleValueType();
995
996 if (!SrcVT.is128BitVector())
997 return SDValue();
998
999 // lower to VSLTI + VILVL if extend could be done in single step.
1000 if (DstVT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits() == 2) {
1001 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1002 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Src, Zero,
1003 DAG.getCondCode(ISD::SETLT));
1004 SDValue LoInterleaved =
1005 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Src);
1006
1007 return DAG.getBitcast(DstVT, LoInterleaved);
1008 }
1009
1010 return SDValue();
1011}
1012
1013// Lower vecreduce_add using vhaddw instructions.
1014// For Example:
1015// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
1016// can be lowered to:
1017// VHADDW_D_W vr0, vr0, vr0
1018// VHADDW_Q_D vr0, vr0, vr0
1019// VPICKVE2GR_D a0, vr0, 0
1020// ADDI_W a0, a0, 0
1021SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
1022 SelectionDAG &DAG) const {
1023
1024 SDLoc DL(Op);
1025 MVT OpVT = Op.getSimpleValueType();
1026 SDValue Val = Op.getOperand(0);
1027
1028 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1029 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1030 unsigned ResBits = OpVT.getScalarSizeInBits();
1031
1032 unsigned LegalVecSize = 128;
1033 bool isLASX256Vector =
1034 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
1035
1036 // Ensure operand type legal or enable it legal.
1037 while (!isTypeLegal(Val.getSimpleValueType())) {
1038 Val = DAG.WidenVector(Val, DL);
1039 }
1040
1041 // NumEles is designed for iterations count, v4i32 for LSX
1042 // and v8i32 for LASX should have the same count.
1043 if (isLASX256Vector) {
1044 NumEles /= 2;
1045 LegalVecSize = 256;
1046 }
1047
1048 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
1049 MVT IntTy = MVT::getIntegerVT(EleBits);
1050 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
1051 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
1052 }
1053
1054 if (isLASX256Vector) {
1055 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
1056 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
1057 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
1058 }
1059
1060 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
1061 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1062 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
1063}
1064
1065// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
1066// For Example:
1067// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1068// can be lowered to:
1069// VBSRL_V vr1, vr0, 8
1070// VMAX_W vr0, vr1, vr0
1071// VBSRL_V vr1, vr0, 4
1072// VMAX_W vr0, vr1, vr0
1073// VPICKVE2GR_W a0, vr0, 0
1074// For 256 bit vector, it is illegal and will be spilt into
1075// two 128 bit vector by default then processed by this.
1076SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1077 SelectionDAG &DAG) const {
1078 SDLoc DL(Op);
1079
1080 MVT OpVT = Op.getSimpleValueType();
1081 SDValue Val = Op.getOperand(0);
1082
1083 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1084 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1085
1086 // Ensure operand type legal or enable it legal.
1087 while (!isTypeLegal(Val.getSimpleValueType())) {
1088 Val = DAG.WidenVector(Val, DL);
1089 }
1090
1091 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
1092 MVT VecTy = Val.getSimpleValueType();
1093 MVT GRLenVT = Subtarget.getGRLenVT();
1094
1095 for (int i = NumEles; i > 1; i /= 2) {
1096 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
1097 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
1098 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
1099 }
1100
1101 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1102 DAG.getConstant(0, DL, GRLenVT));
1103}
1104
1105SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1106 SelectionDAG &DAG) const {
1107 unsigned IsData = Op.getConstantOperandVal(4);
1108
1109 // We don't support non-data prefetch.
1110 // Just preserve the chain.
1111 if (!IsData)
1112 return Op.getOperand(0);
1113
1114 return Op;
1115}
1116
1117SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1118 SelectionDAG &DAG) const {
1119 MVT VT = Op.getSimpleValueType();
1120 assert(VT.isVector() && "Unexpected type");
1121
1122 SDLoc DL(Op);
1123 SDValue R = Op.getOperand(0);
1124 SDValue Amt = Op.getOperand(1);
1125 unsigned Opcode = Op.getOpcode();
1126 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1127
1128 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1129 if (V.getOpcode() != ISD::BUILD_VECTOR)
1130 return false;
1131 if (SDValue SplatValue =
1132 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
1133 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
1134 CstSplatValue = C->getAPIntValue();
1135 return true;
1136 }
1137 }
1138 return false;
1139 };
1140
1141 // Check for constant splat rotation amount.
1142 APInt CstSplatValue;
1143 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1144 bool isROTL = Opcode == ISD::ROTL;
1145
1146 // Check for splat rotate by zero.
1147 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1148 return R;
1149
1150 // LoongArch targets always prefer ISD::ROTR.
1151 if (isROTL) {
1152 SDValue Zero = DAG.getConstant(0, DL, VT);
1153 return DAG.getNode(ISD::ROTR, DL, VT, R,
1154 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1155 }
1156
1157 // Rotate by a immediate.
1158 if (IsCstSplat) {
1159 // ISD::ROTR: Attemp to rotate by a positive immediate.
1160 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1161 if (SDValue Urem =
1162 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1163 return DAG.getNode(Opcode, DL, VT, R, Urem);
1164 }
1165
1166 return Op;
1167}
1168
1169// Return true if Val is equal to (setcc LHS, RHS, CC).
1170// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1171// Otherwise, return std::nullopt.
1172static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1173 ISD::CondCode CC, SDValue Val) {
1174 assert(Val->getOpcode() == ISD::SETCC);
1175 SDValue LHS2 = Val.getOperand(0);
1176 SDValue RHS2 = Val.getOperand(1);
1177 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1178
1179 if (LHS == LHS2 && RHS == RHS2) {
1180 if (CC == CC2)
1181 return true;
1182 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1183 return false;
1184 } else if (LHS == RHS2 && RHS == LHS2) {
1186 if (CC == CC2)
1187 return true;
1188 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1189 return false;
1190 }
1191
1192 return std::nullopt;
1193}
1194
1196 const LoongArchSubtarget &Subtarget) {
1197 SDValue CondV = N->getOperand(0);
1198 SDValue TrueV = N->getOperand(1);
1199 SDValue FalseV = N->getOperand(2);
1200 MVT VT = N->getSimpleValueType(0);
1201 SDLoc DL(N);
1202
1203 // (select c, -1, y) -> -c | y
1204 if (isAllOnesConstant(TrueV)) {
1205 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1206 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1207 }
1208 // (select c, y, -1) -> (c-1) | y
1209 if (isAllOnesConstant(FalseV)) {
1210 SDValue Neg =
1211 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1212 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1213 }
1214
1215 // (select c, 0, y) -> (c-1) & y
1216 if (isNullConstant(TrueV)) {
1217 SDValue Neg =
1218 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1219 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1220 }
1221 // (select c, y, 0) -> -c & y
1222 if (isNullConstant(FalseV)) {
1223 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1224 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1225 }
1226
1227 // select c, ~x, x --> xor -c, x
1228 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1229 const APInt &TrueVal = TrueV->getAsAPIntVal();
1230 const APInt &FalseVal = FalseV->getAsAPIntVal();
1231 if (~TrueVal == FalseVal) {
1232 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1233 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1234 }
1235 }
1236
1237 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1238 // when both truev and falsev are also setcc.
1239 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1240 FalseV.getOpcode() == ISD::SETCC) {
1241 SDValue LHS = CondV.getOperand(0);
1242 SDValue RHS = CondV.getOperand(1);
1243 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1244
1245 // (select x, x, y) -> x | y
1246 // (select !x, x, y) -> x & y
1247 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1248 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1249 DAG.getFreeze(FalseV));
1250 }
1251 // (select x, y, x) -> x & y
1252 // (select !x, y, x) -> x | y
1253 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1254 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1255 DAG.getFreeze(TrueV), FalseV);
1256 }
1257 }
1258
1259 return SDValue();
1260}
1261
1262// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1263// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1264// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1265// being `0` or `-1`. In such cases we can replace `select` with `and`.
1266// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1267// than `c0`?
1268static SDValue
1270 const LoongArchSubtarget &Subtarget) {
1271 unsigned SelOpNo = 0;
1272 SDValue Sel = BO->getOperand(0);
1273 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1274 SelOpNo = 1;
1275 Sel = BO->getOperand(1);
1276 }
1277
1278 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1279 return SDValue();
1280
1281 unsigned ConstSelOpNo = 1;
1282 unsigned OtherSelOpNo = 2;
1283 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1284 ConstSelOpNo = 2;
1285 OtherSelOpNo = 1;
1286 }
1287 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1288 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1289 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1290 return SDValue();
1291
1292 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1293 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1294 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1295 return SDValue();
1296
1297 SDLoc DL(Sel);
1298 EVT VT = BO->getValueType(0);
1299
1300 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1301 if (SelOpNo == 1)
1302 std::swap(NewConstOps[0], NewConstOps[1]);
1303
1304 SDValue NewConstOp =
1305 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1306 if (!NewConstOp)
1307 return SDValue();
1308
1309 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1310 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1311 return SDValue();
1312
1313 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1314 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1315 if (SelOpNo == 1)
1316 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1317 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1318
1319 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1320 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1321 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1322}
1323
1324// Changes the condition code and swaps operands if necessary, so the SetCC
1325// operation matches one of the comparisons supported directly by branches
1326// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1327// compare with 1/-1.
1329 ISD::CondCode &CC, SelectionDAG &DAG) {
1330 // If this is a single bit test that can't be handled by ANDI, shift the
1331 // bit to be tested to the MSB and perform a signed compare with 0.
1332 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1333 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1334 isa<ConstantSDNode>(LHS.getOperand(1))) {
1335 uint64_t Mask = LHS.getConstantOperandVal(1);
1336 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1337 unsigned ShAmt = 0;
1338 if (isPowerOf2_64(Mask)) {
1339 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1340 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1341 } else {
1342 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1343 }
1344
1345 LHS = LHS.getOperand(0);
1346 if (ShAmt != 0)
1347 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1348 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1349 return;
1350 }
1351 }
1352
1353 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1354 int64_t C = RHSC->getSExtValue();
1355 switch (CC) {
1356 default:
1357 break;
1358 case ISD::SETGT:
1359 // Convert X > -1 to X >= 0.
1360 if (C == -1) {
1361 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1362 CC = ISD::SETGE;
1363 return;
1364 }
1365 break;
1366 case ISD::SETLT:
1367 // Convert X < 1 to 0 >= X.
1368 if (C == 1) {
1369 RHS = LHS;
1370 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1371 CC = ISD::SETGE;
1372 return;
1373 }
1374 break;
1375 }
1376 }
1377
1378 switch (CC) {
1379 default:
1380 break;
1381 case ISD::SETGT:
1382 case ISD::SETLE:
1383 case ISD::SETUGT:
1384 case ISD::SETULE:
1386 std::swap(LHS, RHS);
1387 break;
1388 }
1389}
1390
1391SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1392 SelectionDAG &DAG) const {
1393 SDValue CondV = Op.getOperand(0);
1394 SDValue TrueV = Op.getOperand(1);
1395 SDValue FalseV = Op.getOperand(2);
1396 SDLoc DL(Op);
1397 MVT VT = Op.getSimpleValueType();
1398 MVT GRLenVT = Subtarget.getGRLenVT();
1399
1400 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1401 return V;
1402
1403 if (Op.hasOneUse()) {
1404 unsigned UseOpc = Op->user_begin()->getOpcode();
1405 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1406 SDNode *BinOp = *Op->user_begin();
1407 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1408 DAG, Subtarget)) {
1409 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1410 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1411 // may return a constant node and cause crash in lowerSELECT.
1412 if (NewSel.getOpcode() == ISD::SELECT)
1413 return lowerSELECT(NewSel, DAG);
1414 return NewSel;
1415 }
1416 }
1417 }
1418
1419 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1420 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1421 // (select condv, truev, falsev)
1422 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1423 if (CondV.getOpcode() != ISD::SETCC ||
1424 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1425 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1426 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1427
1428 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1429
1430 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1431 }
1432
1433 // If the CondV is the output of a SETCC node which operates on GRLenVT
1434 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1435 // to take advantage of the integer compare+branch instructions. i.e.: (select
1436 // (setcc lhs, rhs, cc), truev, falsev)
1437 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1438 SDValue LHS = CondV.getOperand(0);
1439 SDValue RHS = CondV.getOperand(1);
1440 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1441
1442 // Special case for a select of 2 constants that have a difference of 1.
1443 // Normally this is done by DAGCombine, but if the select is introduced by
1444 // type legalization or op legalization, we miss it. Restricting to SETLT
1445 // case for now because that is what signed saturating add/sub need.
1446 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1447 // but we would probably want to swap the true/false values if the condition
1448 // is SETGE/SETLE to avoid an XORI.
1449 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1450 CCVal == ISD::SETLT) {
1451 const APInt &TrueVal = TrueV->getAsAPIntVal();
1452 const APInt &FalseVal = FalseV->getAsAPIntVal();
1453 if (TrueVal - 1 == FalseVal)
1454 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1455 if (TrueVal + 1 == FalseVal)
1456 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1457 }
1458
1459 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1460 // 1 < x ? x : 1 -> 0 < x ? x : 1
1461 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1462 RHS == TrueV && LHS == FalseV) {
1463 LHS = DAG.getConstant(0, DL, VT);
1464 // 0 <u x is the same as x != 0.
1465 if (CCVal == ISD::SETULT) {
1466 std::swap(LHS, RHS);
1467 CCVal = ISD::SETNE;
1468 }
1469 }
1470
1471 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1472 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1473 RHS == FalseV) {
1474 RHS = DAG.getConstant(0, DL, VT);
1475 }
1476
1477 SDValue TargetCC = DAG.getCondCode(CCVal);
1478
1479 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1480 // (select (setcc lhs, rhs, CC), constant, falsev)
1481 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1482 std::swap(TrueV, FalseV);
1483 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1484 }
1485
1486 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1487 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1488}
1489
1490SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1491 SelectionDAG &DAG) const {
1492 SDValue CondV = Op.getOperand(1);
1493 SDLoc DL(Op);
1494 MVT GRLenVT = Subtarget.getGRLenVT();
1495
1496 if (CondV.getOpcode() == ISD::SETCC) {
1497 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1498 SDValue LHS = CondV.getOperand(0);
1499 SDValue RHS = CondV.getOperand(1);
1500 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1501
1502 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1503
1504 SDValue TargetCC = DAG.getCondCode(CCVal);
1505 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1506 Op.getOperand(0), LHS, RHS, TargetCC,
1507 Op.getOperand(2));
1508 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1509 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1510 Op.getOperand(0), CondV, Op.getOperand(2));
1511 }
1512 }
1513
1514 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1515 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1516 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1517}
1518
1519SDValue
1520LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1521 SelectionDAG &DAG) const {
1522 SDLoc DL(Op);
1523 MVT OpVT = Op.getSimpleValueType();
1524
1525 SDValue Vector = DAG.getUNDEF(OpVT);
1526 SDValue Val = Op.getOperand(0);
1527 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1528
1529 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1530}
1531
1532SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1533 SelectionDAG &DAG) const {
1534 EVT ResTy = Op->getValueType(0);
1535 SDValue Src = Op->getOperand(0);
1536 SDLoc DL(Op);
1537
1538 // LoongArchISD::BITREV_8B is not supported on LA32.
1539 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1540 return SDValue();
1541
1542 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1543 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1544 unsigned int NewEltNum = NewVT.getVectorNumElements();
1545
1546 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1547
1549 for (unsigned int i = 0; i < NewEltNum; i++) {
1550 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1551 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1552 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1553 ? (unsigned)LoongArchISD::BITREV_8B
1554 : (unsigned)ISD::BITREVERSE;
1555 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1556 }
1557 SDValue Res =
1558 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1559
1560 switch (ResTy.getSimpleVT().SimpleTy) {
1561 default:
1562 return SDValue();
1563 case MVT::v16i8:
1564 case MVT::v32i8:
1565 return Res;
1566 case MVT::v8i16:
1567 case MVT::v16i16:
1568 case MVT::v4i32:
1569 case MVT::v8i32: {
1571 for (unsigned int i = 0; i < NewEltNum; i++)
1572 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1573 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1574 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1575 }
1576 }
1577}
1578
1579// Widen element type to get a new mask value (if possible).
1580// For example:
1581// shufflevector <4 x i32> %a, <4 x i32> %b,
1582// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1583// is equivalent to:
1584// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1585// can be lowered to:
1586// VPACKOD_D vr0, vr0, vr1
1588 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1589 unsigned EltBits = VT.getScalarSizeInBits();
1590
1591 if (EltBits > 32 || EltBits == 1)
1592 return SDValue();
1593
1594 SmallVector<int, 8> NewMask;
1595 if (widenShuffleMaskElts(Mask, NewMask)) {
1596 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1597 : MVT::getIntegerVT(EltBits * 2);
1598 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1599 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1600 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1601 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1602 return DAG.getBitcast(
1603 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1604 }
1605 }
1606
1607 return SDValue();
1608}
1609
1610/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1611/// instruction.
1612// The funciton matches elements from one of the input vector shuffled to the
1613// left or right with zeroable elements 'shifted in'. It handles both the
1614// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1615// lane.
1616// Mostly copied from X86.
1617static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1618 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1619 int MaskOffset, const APInt &Zeroable) {
1620 int Size = Mask.size();
1621 unsigned SizeInBits = Size * ScalarSizeInBits;
1622
1623 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1624 for (int i = 0; i < Size; i += Scale)
1625 for (int j = 0; j < Shift; ++j)
1626 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1627 return false;
1628
1629 return true;
1630 };
1631
1632 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1633 int Step = 1) {
1634 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1635 if (!(Mask[i] == -1 || Mask[i] == Low))
1636 return false;
1637 return true;
1638 };
1639
1640 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1641 for (int i = 0; i != Size; i += Scale) {
1642 unsigned Pos = Left ? i + Shift : i;
1643 unsigned Low = Left ? i : i + Shift;
1644 unsigned Len = Scale - Shift;
1645 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1646 return -1;
1647 }
1648
1649 int ShiftEltBits = ScalarSizeInBits * Scale;
1650 bool ByteShift = ShiftEltBits > 64;
1651 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1652 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1653 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1654
1655 // Normalize the scale for byte shifts to still produce an i64 element
1656 // type.
1657 Scale = ByteShift ? Scale / 2 : Scale;
1658
1659 // We need to round trip through the appropriate type for the shift.
1660 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1661 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1662 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1663 return (int)ShiftAmt;
1664 };
1665
1666 unsigned MaxWidth = 128;
1667 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1668 for (int Shift = 1; Shift != Scale; ++Shift)
1669 for (bool Left : {true, false})
1670 if (CheckZeros(Shift, Scale, Left)) {
1671 int ShiftAmt = MatchShift(Shift, Scale, Left);
1672 if (0 < ShiftAmt)
1673 return ShiftAmt;
1674 }
1675
1676 // no match
1677 return -1;
1678}
1679
1680/// Lower VECTOR_SHUFFLE as shift (if possible).
1681///
1682/// For example:
1683/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1684/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1685/// is lowered to:
1686/// (VBSLL_V $v0, $v0, 4)
1687///
1688/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1689/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1690/// is lowered to:
1691/// (VSLLI_D $v0, $v0, 32)
1693 MVT VT, SDValue V1, SDValue V2,
1694 SelectionDAG &DAG,
1695 const LoongArchSubtarget &Subtarget,
1696 const APInt &Zeroable) {
1697 int Size = Mask.size();
1698 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1699
1700 MVT ShiftVT;
1701 SDValue V = V1;
1702 unsigned Opcode;
1703
1704 // Try to match shuffle against V1 shift.
1705 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1706 Mask, 0, Zeroable);
1707
1708 // If V1 failed, try to match shuffle against V2 shift.
1709 if (ShiftAmt < 0) {
1710 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1711 Mask, Size, Zeroable);
1712 V = V2;
1713 }
1714
1715 if (ShiftAmt < 0)
1716 return SDValue();
1717
1718 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1719 "Illegal integer vector type");
1720 V = DAG.getBitcast(ShiftVT, V);
1721 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1722 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1723 return DAG.getBitcast(VT, V);
1724}
1725
1726/// Determine whether a range fits a regular pattern of values.
1727/// This function accounts for the possibility of jumping over the End iterator.
1728template <typename ValType>
1729static bool
1731 unsigned CheckStride,
1733 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1734 auto &I = Begin;
1735
1736 while (I != End) {
1737 if (*I != -1 && *I != ExpectedIndex)
1738 return false;
1739 ExpectedIndex += ExpectedIndexStride;
1740
1741 // Incrementing past End is undefined behaviour so we must increment one
1742 // step at a time and check for End at each step.
1743 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1744 ; // Empty loop body.
1745 }
1746 return true;
1747}
1748
1749/// Compute whether each element of a shuffle is zeroable.
1750///
1751/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1753 SDValue V2, APInt &KnownUndef,
1754 APInt &KnownZero) {
1755 int Size = Mask.size();
1756 KnownUndef = KnownZero = APInt::getZero(Size);
1757
1758 V1 = peekThroughBitcasts(V1);
1759 V2 = peekThroughBitcasts(V2);
1760
1761 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1762 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1763
1764 int VectorSizeInBits = V1.getValueSizeInBits();
1765 int ScalarSizeInBits = VectorSizeInBits / Size;
1766 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1767 (void)ScalarSizeInBits;
1768
1769 for (int i = 0; i < Size; ++i) {
1770 int M = Mask[i];
1771 if (M < 0) {
1772 KnownUndef.setBit(i);
1773 continue;
1774 }
1775 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1776 KnownZero.setBit(i);
1777 continue;
1778 }
1779 }
1780}
1781
1782/// Test whether a shuffle mask is equivalent within each sub-lane.
1783///
1784/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1785/// non-trivial to compute in the face of undef lanes. The representation is
1786/// suitable for use with existing 128-bit shuffles as entries from the second
1787/// vector have been remapped to [LaneSize, 2*LaneSize).
1788static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1789 ArrayRef<int> Mask,
1790 SmallVectorImpl<int> &RepeatedMask) {
1791 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1792 RepeatedMask.assign(LaneSize, -1);
1793 int Size = Mask.size();
1794 for (int i = 0; i < Size; ++i) {
1795 assert(Mask[i] == -1 || Mask[i] >= 0);
1796 if (Mask[i] < 0)
1797 continue;
1798 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1799 // This entry crosses lanes, so there is no way to model this shuffle.
1800 return false;
1801
1802 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1803 // Adjust second vector indices to start at LaneSize instead of Size.
1804 int LocalM =
1805 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1806 if (RepeatedMask[i % LaneSize] < 0)
1807 // This is the first non-undef entry in this slot of a 128-bit lane.
1808 RepeatedMask[i % LaneSize] = LocalM;
1809 else if (RepeatedMask[i % LaneSize] != LocalM)
1810 // Found a mismatch with the repeated mask.
1811 return false;
1812 }
1813 return true;
1814}
1815
1816/// Attempts to match vector shuffle as byte rotation.
1818 ArrayRef<int> Mask) {
1819
1820 SDValue Lo, Hi;
1821 SmallVector<int, 16> RepeatedMask;
1822
1823 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1824 return -1;
1825
1826 int NumElts = RepeatedMask.size();
1827 int Rotation = 0;
1828 int Scale = 16 / NumElts;
1829
1830 for (int i = 0; i < NumElts; ++i) {
1831 int M = RepeatedMask[i];
1832 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1833 "Unexpected mask index.");
1834 if (M < 0)
1835 continue;
1836
1837 // Determine where a rotated vector would have started.
1838 int StartIdx = i - (M % NumElts);
1839 if (StartIdx == 0)
1840 return -1;
1841
1842 // If we found the tail of a vector the rotation must be the missing
1843 // front. If we found the head of a vector, it must be how much of the
1844 // head.
1845 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1846
1847 if (Rotation == 0)
1848 Rotation = CandidateRotation;
1849 else if (Rotation != CandidateRotation)
1850 return -1;
1851
1852 // Compute which value this mask is pointing at.
1853 SDValue MaskV = M < NumElts ? V1 : V2;
1854
1855 // Compute which of the two target values this index should be assigned
1856 // to. This reflects whether the high elements are remaining or the low
1857 // elements are remaining.
1858 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1859
1860 // Either set up this value if we've not encountered it before, or check
1861 // that it remains consistent.
1862 if (!TargetV)
1863 TargetV = MaskV;
1864 else if (TargetV != MaskV)
1865 return -1;
1866 }
1867
1868 // Check that we successfully analyzed the mask, and normalize the results.
1869 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1870 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1871 if (!Lo)
1872 Lo = Hi;
1873 else if (!Hi)
1874 Hi = Lo;
1875
1876 V1 = Lo;
1877 V2 = Hi;
1878
1879 return Rotation * Scale;
1880}
1881
1882/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1883///
1884/// For example:
1885/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1886/// <2 x i32> <i32 3, i32 0>
1887/// is lowered to:
1888/// (VBSRL_V $v1, $v1, 8)
1889/// (VBSLL_V $v0, $v0, 8)
1890/// (VOR_V $v0, $V0, $v1)
1891static SDValue
1893 SDValue V1, SDValue V2, SelectionDAG &DAG,
1894 const LoongArchSubtarget &Subtarget) {
1895
1896 SDValue Lo = V1, Hi = V2;
1897 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1898 if (ByteRotation <= 0)
1899 return SDValue();
1900
1901 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1902 Lo = DAG.getBitcast(ByteVT, Lo);
1903 Hi = DAG.getBitcast(ByteVT, Hi);
1904
1905 int LoByteShift = 16 - ByteRotation;
1906 int HiByteShift = ByteRotation;
1907 MVT GRLenVT = Subtarget.getGRLenVT();
1908
1909 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1910 DAG.getConstant(LoByteShift, DL, GRLenVT));
1911 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1912 DAG.getConstant(HiByteShift, DL, GRLenVT));
1913 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1914}
1915
1916/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1917///
1918/// For example:
1919/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1920/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1921/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1922/// is lowered to:
1923/// (VREPLI $v1, 0)
1924/// (VILVL $v0, $v1, $v0)
1926 ArrayRef<int> Mask, MVT VT,
1927 SDValue V1, SDValue V2,
1928 SelectionDAG &DAG,
1929 const APInt &Zeroable) {
1930 int Bits = VT.getSizeInBits();
1931 int EltBits = VT.getScalarSizeInBits();
1932 int NumElements = VT.getVectorNumElements();
1933
1934 if (Zeroable.isAllOnes())
1935 return DAG.getConstant(0, DL, VT);
1936
1937 // Define a helper function to check a particular ext-scale and lower to it if
1938 // valid.
1939 auto Lower = [&](int Scale) -> SDValue {
1940 SDValue InputV;
1941 bool AnyExt = true;
1942 int Offset = 0;
1943 for (int i = 0; i < NumElements; i++) {
1944 int M = Mask[i];
1945 if (M < 0)
1946 continue;
1947 if (i % Scale != 0) {
1948 // Each of the extended elements need to be zeroable.
1949 if (!Zeroable[i])
1950 return SDValue();
1951
1952 AnyExt = false;
1953 continue;
1954 }
1955
1956 // Each of the base elements needs to be consecutive indices into the
1957 // same input vector.
1958 SDValue V = M < NumElements ? V1 : V2;
1959 M = M % NumElements;
1960 if (!InputV) {
1961 InputV = V;
1962 Offset = M - (i / Scale);
1963
1964 // These offset can't be handled
1965 if (Offset % (NumElements / Scale))
1966 return SDValue();
1967 } else if (InputV != V)
1968 return SDValue();
1969
1970 if (M != (Offset + (i / Scale)))
1971 return SDValue(); // Non-consecutive strided elements.
1972 }
1973
1974 // If we fail to find an input, we have a zero-shuffle which should always
1975 // have already been handled.
1976 if (!InputV)
1977 return SDValue();
1978
1979 do {
1980 unsigned VilVLoHi = LoongArchISD::VILVL;
1981 if (Offset >= (NumElements / 2)) {
1982 VilVLoHi = LoongArchISD::VILVH;
1983 Offset -= (NumElements / 2);
1984 }
1985
1986 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1987 SDValue Ext =
1988 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1989 InputV = DAG.getBitcast(InputVT, InputV);
1990 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1991 Scale /= 2;
1992 EltBits *= 2;
1993 NumElements /= 2;
1994 } while (Scale > 1);
1995 return DAG.getBitcast(VT, InputV);
1996 };
1997
1998 // Each iteration, try extending the elements half as much, but into twice as
1999 // many elements.
2000 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
2001 NumExtElements *= 2) {
2002 if (SDValue V = Lower(NumElements / NumExtElements))
2003 return V;
2004 }
2005 return SDValue();
2006}
2007
2008/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
2009///
2010/// VREPLVEI performs vector broadcast based on an element specified by an
2011/// integer immediate, with its mask being similar to:
2012/// <x, x, x, ...>
2013/// where x is any valid index.
2014///
2015/// When undef's appear in the mask they are treated as if they were whatever
2016/// value is necessary in order to fit the above form.
2017static SDValue
2019 SDValue V1, SelectionDAG &DAG,
2020 const LoongArchSubtarget &Subtarget) {
2021 int SplatIndex = -1;
2022 for (const auto &M : Mask) {
2023 if (M != -1) {
2024 SplatIndex = M;
2025 break;
2026 }
2027 }
2028
2029 if (SplatIndex == -1)
2030 return DAG.getUNDEF(VT);
2031
2032 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2033 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
2034 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2035 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2036 }
2037
2038 return SDValue();
2039}
2040
2041/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
2042///
2043/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
2044/// elements according to a <4 x i2> constant (encoded as an integer immediate).
2045///
2046/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
2047/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2048/// When undef's appear they are treated as if they were whatever value is
2049/// necessary in order to fit the above forms.
2050///
2051/// For example:
2052/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2053/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2054/// i32 7, i32 6, i32 5, i32 4>
2055/// is lowered to:
2056/// (VSHUF4I_H $v0, $v1, 27)
2057/// where the 27 comes from:
2058/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2059static SDValue
2061 SDValue V1, SDValue V2, SelectionDAG &DAG,
2062 const LoongArchSubtarget &Subtarget) {
2063
2064 unsigned SubVecSize = 4;
2065 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2066 SubVecSize = 2;
2067
2068 int SubMask[4] = {-1, -1, -1, -1};
2069 for (unsigned i = 0; i < SubVecSize; ++i) {
2070 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2071 int M = Mask[j];
2072
2073 // Convert from vector index to 4-element subvector index
2074 // If an index refers to an element outside of the subvector then give up
2075 if (M != -1) {
2076 M -= 4 * (j / SubVecSize);
2077 if (M < 0 || M >= 4)
2078 return SDValue();
2079 }
2080
2081 // If the mask has an undef, replace it with the current index.
2082 // Note that it might still be undef if the current index is also undef
2083 if (SubMask[i] == -1)
2084 SubMask[i] = M;
2085 // Check that non-undef values are the same as in the mask. If they
2086 // aren't then give up
2087 else if (M != -1 && M != SubMask[i])
2088 return SDValue();
2089 }
2090 }
2091
2092 // Calculate the immediate. Replace any remaining undefs with zero
2093 int Imm = 0;
2094 for (int i = SubVecSize - 1; i >= 0; --i) {
2095 int M = SubMask[i];
2096
2097 if (M == -1)
2098 M = 0;
2099
2100 Imm <<= 2;
2101 Imm |= M & 0x3;
2102 }
2103
2104 MVT GRLenVT = Subtarget.getGRLenVT();
2105
2106 // Return vshuf4i.d
2107 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2108 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2109 DAG.getConstant(Imm, DL, GRLenVT));
2110
2111 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
2112 DAG.getConstant(Imm, DL, GRLenVT));
2113}
2114
2115/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2116///
2117/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2118/// reverse whose mask likes:
2119/// <7, 6, 5, 4, 3, 2, 1, 0>
2120///
2121/// When undef's appear in the mask they are treated as if they were whatever
2122/// value is necessary in order to fit the above forms.
2123static SDValue
2125 SDValue V1, SelectionDAG &DAG,
2126 const LoongArchSubtarget &Subtarget) {
2127 // Only vectors with i8/i16 elements which cannot match other patterns
2128 // directly needs to do this.
2129 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2130 VT != MVT::v16i16)
2131 return SDValue();
2132
2133 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
2134 return SDValue();
2135
2136 int WidenNumElts = VT.getVectorNumElements() / 4;
2137 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2138 for (int i = 0; i < WidenNumElts; ++i)
2139 WidenMask[i] = WidenNumElts - 1 - i;
2140
2141 MVT WidenVT = MVT::getVectorVT(
2142 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
2143 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2144 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2145 DAG.getUNDEF(WidenVT), WidenMask);
2146
2147 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2148 DAG.getBitcast(VT, WidenRev),
2149 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2150}
2151
2152/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2153///
2154/// VPACKEV interleaves the even elements from each vector.
2155///
2156/// It is possible to lower into VPACKEV when the mask consists of two of the
2157/// following forms interleaved:
2158/// <0, 2, 4, ...>
2159/// <n, n+2, n+4, ...>
2160/// where n is the number of elements in the vector.
2161/// For example:
2162/// <0, 0, 2, 2, 4, 4, ...>
2163/// <0, n, 2, n+2, 4, n+4, ...>
2164///
2165/// When undef's appear in the mask they are treated as if they were whatever
2166/// value is necessary in order to fit the above forms.
2168 MVT VT, SDValue V1, SDValue V2,
2169 SelectionDAG &DAG) {
2170
2171 const auto &Begin = Mask.begin();
2172 const auto &End = Mask.end();
2173 SDValue OriV1 = V1, OriV2 = V2;
2174
2175 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2176 V1 = OriV1;
2177 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2178 V1 = OriV2;
2179 else
2180 return SDValue();
2181
2182 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2183 V2 = OriV1;
2184 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2185 V2 = OriV2;
2186 else
2187 return SDValue();
2188
2189 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2190}
2191
2192/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2193///
2194/// VPACKOD interleaves the odd elements from each vector.
2195///
2196/// It is possible to lower into VPACKOD when the mask consists of two of the
2197/// following forms interleaved:
2198/// <1, 3, 5, ...>
2199/// <n+1, n+3, n+5, ...>
2200/// where n is the number of elements in the vector.
2201/// For example:
2202/// <1, 1, 3, 3, 5, 5, ...>
2203/// <1, n+1, 3, n+3, 5, n+5, ...>
2204///
2205/// When undef's appear in the mask they are treated as if they were whatever
2206/// value is necessary in order to fit the above forms.
2208 MVT VT, SDValue V1, SDValue V2,
2209 SelectionDAG &DAG) {
2210
2211 const auto &Begin = Mask.begin();
2212 const auto &End = Mask.end();
2213 SDValue OriV1 = V1, OriV2 = V2;
2214
2215 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2216 V1 = OriV1;
2217 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2218 V1 = OriV2;
2219 else
2220 return SDValue();
2221
2222 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2223 V2 = OriV1;
2224 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2225 V2 = OriV2;
2226 else
2227 return SDValue();
2228
2229 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2230}
2231
2232/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2233///
2234/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2235/// of each vector.
2236///
2237/// It is possible to lower into VILVH when the mask consists of two of the
2238/// following forms interleaved:
2239/// <x, x+1, x+2, ...>
2240/// <n+x, n+x+1, n+x+2, ...>
2241/// where n is the number of elements in the vector and x is half n.
2242/// For example:
2243/// <x, x, x+1, x+1, x+2, x+2, ...>
2244/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2245///
2246/// When undef's appear in the mask they are treated as if they were whatever
2247/// value is necessary in order to fit the above forms.
2249 MVT VT, SDValue V1, SDValue V2,
2250 SelectionDAG &DAG) {
2251
2252 const auto &Begin = Mask.begin();
2253 const auto &End = Mask.end();
2254 unsigned HalfSize = Mask.size() / 2;
2255 SDValue OriV1 = V1, OriV2 = V2;
2256
2257 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2258 V1 = OriV1;
2259 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2260 V1 = OriV2;
2261 else
2262 return SDValue();
2263
2264 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2265 V2 = OriV1;
2266 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2267 1))
2268 V2 = OriV2;
2269 else
2270 return SDValue();
2271
2272 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2273}
2274
2275/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2276///
2277/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2278/// of each vector.
2279///
2280/// It is possible to lower into VILVL when the mask consists of two of the
2281/// following forms interleaved:
2282/// <0, 1, 2, ...>
2283/// <n, n+1, n+2, ...>
2284/// where n is the number of elements in the vector.
2285/// For example:
2286/// <0, 0, 1, 1, 2, 2, ...>
2287/// <0, n, 1, n+1, 2, n+2, ...>
2288///
2289/// When undef's appear in the mask they are treated as if they were whatever
2290/// value is necessary in order to fit the above forms.
2292 MVT VT, SDValue V1, SDValue V2,
2293 SelectionDAG &DAG) {
2294
2295 const auto &Begin = Mask.begin();
2296 const auto &End = Mask.end();
2297 SDValue OriV1 = V1, OriV2 = V2;
2298
2299 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2300 V1 = OriV1;
2301 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2302 V1 = OriV2;
2303 else
2304 return SDValue();
2305
2306 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2307 V2 = OriV1;
2308 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2309 V2 = OriV2;
2310 else
2311 return SDValue();
2312
2313 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2314}
2315
2316/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2317///
2318/// VPICKEV copies the even elements of each vector into the result vector.
2319///
2320/// It is possible to lower into VPICKEV when the mask consists of two of the
2321/// following forms concatenated:
2322/// <0, 2, 4, ...>
2323/// <n, n+2, n+4, ...>
2324/// where n is the number of elements in the vector.
2325/// For example:
2326/// <0, 2, 4, ..., 0, 2, 4, ...>
2327/// <0, 2, 4, ..., n, n+2, n+4, ...>
2328///
2329/// When undef's appear in the mask they are treated as if they were whatever
2330/// value is necessary in order to fit the above forms.
2332 MVT VT, SDValue V1, SDValue V2,
2333 SelectionDAG &DAG) {
2334
2335 const auto &Begin = Mask.begin();
2336 const auto &Mid = Mask.begin() + Mask.size() / 2;
2337 const auto &End = Mask.end();
2338 SDValue OriV1 = V1, OriV2 = V2;
2339
2340 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2341 V1 = OriV1;
2342 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2343 V1 = OriV2;
2344 else
2345 return SDValue();
2346
2347 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2348 V2 = OriV1;
2349 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2350 V2 = OriV2;
2351
2352 else
2353 return SDValue();
2354
2355 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2356}
2357
2358/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2359///
2360/// VPICKOD copies the odd elements of each vector into the result vector.
2361///
2362/// It is possible to lower into VPICKOD when the mask consists of two of the
2363/// following forms concatenated:
2364/// <1, 3, 5, ...>
2365/// <n+1, n+3, n+5, ...>
2366/// where n is the number of elements in the vector.
2367/// For example:
2368/// <1, 3, 5, ..., 1, 3, 5, ...>
2369/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2370///
2371/// When undef's appear in the mask they are treated as if they were whatever
2372/// value is necessary in order to fit the above forms.
2374 MVT VT, SDValue V1, SDValue V2,
2375 SelectionDAG &DAG) {
2376
2377 const auto &Begin = Mask.begin();
2378 const auto &Mid = Mask.begin() + Mask.size() / 2;
2379 const auto &End = Mask.end();
2380 SDValue OriV1 = V1, OriV2 = V2;
2381
2382 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2383 V1 = OriV1;
2384 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2385 V1 = OriV2;
2386 else
2387 return SDValue();
2388
2389 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2390 V2 = OriV1;
2391 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2392 V2 = OriV2;
2393 else
2394 return SDValue();
2395
2396 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2397}
2398
2399/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2400///
2401/// VEXTRINS copies one element of a vector into any place of the result
2402/// vector and makes no change to the rest elements of the result vector.
2403///
2404/// It is possible to lower into VEXTRINS when the mask takes the form:
2405/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2406/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2407/// where n is the number of elements in the vector and i is in [0, n).
2408/// For example:
2409/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2410/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2411///
2412/// When undef's appear in the mask they are treated as if they were whatever
2413/// value is necessary in order to fit the above forms.
2414static SDValue
2416 SDValue V1, SDValue V2, SelectionDAG &DAG,
2417 const LoongArchSubtarget &Subtarget) {
2418 unsigned NumElts = VT.getVectorNumElements();
2419 MVT EltVT = VT.getVectorElementType();
2420 MVT GRLenVT = Subtarget.getGRLenVT();
2421
2422 if (Mask.size() != NumElts)
2423 return SDValue();
2424
2425 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2426 int DiffCount = 0;
2427 int DiffPos = -1;
2428 for (unsigned i = 0; i < NumElts; ++i) {
2429 if (Mask[i] == -1)
2430 continue;
2431 if (Mask[i] != int(Base + i)) {
2432 ++DiffCount;
2433 DiffPos = int(i);
2434 if (DiffCount > 1)
2435 return SDValue();
2436 }
2437 }
2438
2439 // Need exactly one differing element to lower into VEXTRINS.
2440 if (DiffCount != 1)
2441 return SDValue();
2442
2443 // DiffMask must be in [0, 2N).
2444 int DiffMask = Mask[DiffPos];
2445 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2446 return SDValue();
2447
2448 // Determine source vector and source index.
2449 SDValue SrcVec;
2450 unsigned SrcIdx;
2451 if (unsigned(DiffMask) < NumElts) {
2452 SrcVec = V1;
2453 SrcIdx = unsigned(DiffMask);
2454 } else {
2455 SrcVec = V2;
2456 SrcIdx = unsigned(DiffMask) - NumElts;
2457 }
2458
2459 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2460 // patterns of VEXTRINS in tablegen.
2461 SDValue Extracted = DAG.getNode(
2462 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2463 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2464 SDValue Result =
2465 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2466 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2467
2468 return Result;
2469 };
2470
2471 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2472 if (SDValue Result = tryLowerToExtrAndIns(0))
2473 return Result;
2474 return tryLowerToExtrAndIns(NumElts);
2475}
2476
2477// Check the Mask and then build SrcVec and MaskImm infos which will
2478// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
2479// On success, return true. Otherwise, return false.
2482 unsigned &MaskImm) {
2483 unsigned MaskSize = Mask.size();
2484
2485 auto isValid = [&](int M, int Off) {
2486 return (M == -1) || (M >= Off && M < Off + 4);
2487 };
2488
2489 auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
2490 auto immPart = [&](int M, unsigned Off) {
2491 return (M == -1 ? 0 : (M - Off)) & 0x3;
2492 };
2493 MaskImm |= immPart(MLo, Off) << (I * 2);
2494 MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
2495 };
2496
2497 for (unsigned i = 0; i < 4; i += 2) {
2498 int MLo = Mask[i];
2499 int MHi = Mask[i + 1];
2500
2501 if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
2502 int M2Lo = Mask[i + 4];
2503 int M2Hi = Mask[i + 5];
2504 if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
2505 return false;
2506 }
2507
2508 if (isValid(MLo, 0) && isValid(MHi, 0)) {
2509 SrcVec.push_back(V1);
2510 buildImm(MLo, MHi, 0, i);
2511 } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
2512 SrcVec.push_back(V2);
2513 buildImm(MLo, MHi, MaskSize, i);
2514 } else {
2515 return false;
2516 }
2517 }
2518
2519 return true;
2520}
2521
2522/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
2523///
2524/// VPERMI selects two elements from each of the two vectors based on the
2525/// mask and places them in the corresponding positions of the result vector
2526/// in order. Only v4i32 and v4f32 types are allowed.
2527///
2528/// It is possible to lower into VPERMI when the mask consists of two of the
2529/// following forms concatenated:
2530/// <i, j, u, v>
2531/// <u, v, i, j>
2532/// where i,j are in [0,4) and u,v are in [4, 8).
2533/// For example:
2534/// <2, 3, 4, 5>
2535/// <5, 7, 0, 2>
2536///
2537/// When undef's appear in the mask they are treated as if they were whatever
2538/// value is necessary in order to fit the above forms.
2540 MVT VT, SDValue V1, SDValue V2,
2541 SelectionDAG &DAG,
2542 const LoongArchSubtarget &Subtarget) {
2543 if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
2544 Mask.size() != VT.getVectorNumElements())
2545 return SDValue();
2546
2548 unsigned MaskImm = 0;
2549 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2550 return SDValue();
2551
2552 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2553 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2554}
2555
2556/// Lower VECTOR_SHUFFLE into VSHUF.
2557///
2558/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2559/// adding it as an operand to the resulting VSHUF.
2561 MVT VT, SDValue V1, SDValue V2,
2562 SelectionDAG &DAG,
2563 const LoongArchSubtarget &Subtarget) {
2564
2566 for (auto M : Mask)
2567 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2568
2569 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2570 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2571
2572 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2573 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2574 // VSHF concatenates the vectors in a bitwise fashion:
2575 // <0b00, 0b01> + <0b10, 0b11> ->
2576 // 0b0100 + 0b1110 -> 0b01001110
2577 // <0b10, 0b11, 0b00, 0b01>
2578 // We must therefore swap the operands to get the correct result.
2579 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2580}
2581
2582/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2583///
2584/// This routine breaks down the specific type of 128-bit shuffle and
2585/// dispatches to the lowering routines accordingly.
2587 SDValue V1, SDValue V2, SelectionDAG &DAG,
2588 const LoongArchSubtarget &Subtarget) {
2589 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2590 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2591 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2592 "Vector type is unsupported for lsx!");
2594 "Two operands have different types!");
2595 assert(VT.getVectorNumElements() == Mask.size() &&
2596 "Unexpected mask size for shuffle!");
2597 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2598
2599 APInt KnownUndef, KnownZero;
2600 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2601 APInt Zeroable = KnownUndef | KnownZero;
2602
2603 SDValue Result;
2604 // TODO: Add more comparison patterns.
2605 if (V2.isUndef()) {
2606 if ((Result =
2607 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2608 return Result;
2609 if ((Result =
2610 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2611 return Result;
2612 if ((Result =
2613 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2614 return Result;
2615
2616 // TODO: This comment may be enabled in the future to better match the
2617 // pattern for instruction selection.
2618 /* V2 = V1; */
2619 }
2620
2621 // It is recommended not to change the pattern comparison order for better
2622 // performance.
2623 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2624 return Result;
2625 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2626 return Result;
2627 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2628 return Result;
2629 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2630 return Result;
2631 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2632 return Result;
2633 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2634 return Result;
2635 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2636 (Result =
2637 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2638 return Result;
2639 if ((Result =
2640 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2641 return Result;
2642 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2643 Zeroable)))
2644 return Result;
2645 if ((Result =
2646 lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2647 return Result;
2648 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2649 Zeroable)))
2650 return Result;
2651 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2652 Subtarget)))
2653 return Result;
2654 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2655 return NewShuffle;
2656 if ((Result =
2657 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2658 return Result;
2659 return SDValue();
2660}
2661
2662/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2663///
2664/// It is a XVREPLVEI when the mask is:
2665/// <x, x, x, ..., x+n, x+n, x+n, ...>
2666/// where the number of x is equal to n and n is half the length of vector.
2667///
2668/// When undef's appear in the mask they are treated as if they were whatever
2669/// value is necessary in order to fit the above form.
2670static SDValue
2672 SDValue V1, SelectionDAG &DAG,
2673 const LoongArchSubtarget &Subtarget) {
2674 int SplatIndex = -1;
2675 for (const auto &M : Mask) {
2676 if (M != -1) {
2677 SplatIndex = M;
2678 break;
2679 }
2680 }
2681
2682 if (SplatIndex == -1)
2683 return DAG.getUNDEF(VT);
2684
2685 const auto &Begin = Mask.begin();
2686 const auto &End = Mask.end();
2687 int HalfSize = Mask.size() / 2;
2688
2689 if (SplatIndex >= HalfSize)
2690 return SDValue();
2691
2692 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2693 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2694 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2695 0)) {
2696 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2697 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2698 }
2699
2700 return SDValue();
2701}
2702
2703/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2704static SDValue
2706 SDValue V1, SDValue V2, SelectionDAG &DAG,
2707 const LoongArchSubtarget &Subtarget) {
2708 // XVSHUF4I_D must be handled separately because it is different from other
2709 // types of [X]VSHUF4I instructions.
2710 if (Mask.size() == 4) {
2711 unsigned MaskImm = 0;
2712 for (int i = 1; i >= 0; --i) {
2713 int MLo = Mask[i];
2714 int MHi = Mask[i + 2];
2715 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2716 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2717 return SDValue();
2718 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2719 return SDValue();
2720
2721 MaskImm <<= 2;
2722 if (MLo != -1)
2723 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2724 else if (MHi != -1)
2725 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2726 }
2727
2728 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2729 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2730 }
2731
2732 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2733}
2734
2735/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2736static SDValue
2738 SDValue V1, SDValue V2, SelectionDAG &DAG,
2739 const LoongArchSubtarget &Subtarget) {
2740 MVT GRLenVT = Subtarget.getGRLenVT();
2741 unsigned MaskSize = Mask.size();
2742 if (MaskSize != VT.getVectorNumElements())
2743 return SDValue();
2744
2745 // Consider XVPERMI_W.
2746 if (VT == MVT::v8i32 || VT == MVT::v8f32) {
2748 unsigned MaskImm = 0;
2749 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2750 return SDValue();
2751
2752 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2753 DAG.getConstant(MaskImm, DL, GRLenVT));
2754 }
2755
2756 // Consider XVPERMI_D.
2757 if (VT == MVT::v4i64 || VT == MVT::v4f64) {
2758 unsigned MaskImm = 0;
2759 for (unsigned i = 0; i < MaskSize; ++i) {
2760 if (Mask[i] == -1)
2761 continue;
2762 if (Mask[i] >= (int)MaskSize)
2763 return SDValue();
2764 MaskImm |= Mask[i] << (i * 2);
2765 }
2766
2767 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2768 DAG.getConstant(MaskImm, DL, GRLenVT));
2769 }
2770
2771 return SDValue();
2772}
2773
2774/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2776 MVT VT, SDValue V1, SelectionDAG &DAG,
2777 const LoongArchSubtarget &Subtarget) {
2778 // LoongArch LASX only have XVPERM_W.
2779 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2780 return SDValue();
2781
2782 unsigned NumElts = VT.getVectorNumElements();
2783 unsigned HalfSize = NumElts / 2;
2784 bool FrontLo = true, FrontHi = true;
2785 bool BackLo = true, BackHi = true;
2786
2787 auto inRange = [](int val, int low, int high) {
2788 return (val == -1) || (val >= low && val < high);
2789 };
2790
2791 for (unsigned i = 0; i < HalfSize; ++i) {
2792 int Fronti = Mask[i];
2793 int Backi = Mask[i + HalfSize];
2794
2795 FrontLo &= inRange(Fronti, 0, HalfSize);
2796 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2797 BackLo &= inRange(Backi, 0, HalfSize);
2798 BackHi &= inRange(Backi, HalfSize, NumElts);
2799 }
2800
2801 // If both the lower and upper 128-bit parts access only one half of the
2802 // vector (either lower or upper), avoid using xvperm.w. The latency of
2803 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2804 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2805 return SDValue();
2806
2808 MVT GRLenVT = Subtarget.getGRLenVT();
2809 for (unsigned i = 0; i < NumElts; ++i)
2810 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2811 : DAG.getConstant(Mask[i], DL, GRLenVT));
2812 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2813
2814 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2815}
2816
2817/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2819 MVT VT, SDValue V1, SDValue V2,
2820 SelectionDAG &DAG) {
2821 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2822}
2823
2824/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2826 MVT VT, SDValue V1, SDValue V2,
2827 SelectionDAG &DAG) {
2828 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2829}
2830
2831/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2833 MVT VT, SDValue V1, SDValue V2,
2834 SelectionDAG &DAG) {
2835
2836 const auto &Begin = Mask.begin();
2837 const auto &End = Mask.end();
2838 unsigned HalfSize = Mask.size() / 2;
2839 unsigned LeftSize = HalfSize / 2;
2840 SDValue OriV1 = V1, OriV2 = V2;
2841
2842 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2843 1) &&
2844 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2845 V1 = OriV1;
2846 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2847 Mask.size() + HalfSize - LeftSize, 1) &&
2848 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2849 Mask.size() + HalfSize + LeftSize, 1))
2850 V1 = OriV2;
2851 else
2852 return SDValue();
2853
2854 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2855 1) &&
2856 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2857 1))
2858 V2 = OriV1;
2859 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2860 Mask.size() + HalfSize - LeftSize, 1) &&
2861 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2862 Mask.size() + HalfSize + LeftSize, 1))
2863 V2 = OriV2;
2864 else
2865 return SDValue();
2866
2867 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2868}
2869
2870/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2872 MVT VT, SDValue V1, SDValue V2,
2873 SelectionDAG &DAG) {
2874
2875 const auto &Begin = Mask.begin();
2876 const auto &End = Mask.end();
2877 unsigned HalfSize = Mask.size() / 2;
2878 SDValue OriV1 = V1, OriV2 = V2;
2879
2880 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2881 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2882 V1 = OriV1;
2883 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2884 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2885 Mask.size() + HalfSize, 1))
2886 V1 = OriV2;
2887 else
2888 return SDValue();
2889
2890 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2891 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2892 V2 = OriV1;
2893 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2894 1) &&
2895 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2896 Mask.size() + HalfSize, 1))
2897 V2 = OriV2;
2898 else
2899 return SDValue();
2900
2901 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2902}
2903
2904/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2906 MVT VT, SDValue V1, SDValue V2,
2907 SelectionDAG &DAG) {
2908
2909 const auto &Begin = Mask.begin();
2910 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2911 const auto &Mid = Mask.begin() + Mask.size() / 2;
2912 const auto &RightMid = Mask.end() - Mask.size() / 4;
2913 const auto &End = Mask.end();
2914 unsigned HalfSize = Mask.size() / 2;
2915 SDValue OriV1 = V1, OriV2 = V2;
2916
2917 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2918 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2919 V1 = OriV1;
2920 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2921 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2922 V1 = OriV2;
2923 else
2924 return SDValue();
2925
2926 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2927 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2928 V2 = OriV1;
2929 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2930 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2931 V2 = OriV2;
2932
2933 else
2934 return SDValue();
2935
2936 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2937}
2938
2939/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2941 MVT VT, SDValue V1, SDValue V2,
2942 SelectionDAG &DAG) {
2943
2944 const auto &Begin = Mask.begin();
2945 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2946 const auto &Mid = Mask.begin() + Mask.size() / 2;
2947 const auto &RightMid = Mask.end() - Mask.size() / 4;
2948 const auto &End = Mask.end();
2949 unsigned HalfSize = Mask.size() / 2;
2950 SDValue OriV1 = V1, OriV2 = V2;
2951
2952 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2953 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2954 V1 = OriV1;
2955 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2956 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2957 2))
2958 V1 = OriV2;
2959 else
2960 return SDValue();
2961
2962 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2963 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2964 V2 = OriV1;
2965 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2966 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2967 2))
2968 V2 = OriV2;
2969 else
2970 return SDValue();
2971
2972 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2973}
2974
2975/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2976static SDValue
2978 SDValue V1, SDValue V2, SelectionDAG &DAG,
2979 const LoongArchSubtarget &Subtarget) {
2980 int NumElts = VT.getVectorNumElements();
2981 int HalfSize = NumElts / 2;
2982 MVT EltVT = VT.getVectorElementType();
2983 MVT GRLenVT = Subtarget.getGRLenVT();
2984
2985 if ((int)Mask.size() != NumElts)
2986 return SDValue();
2987
2988 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
2989 SmallVector<int> DiffPos;
2990 for (int i = 0; i < NumElts; ++i) {
2991 if (Mask[i] == -1)
2992 continue;
2993 if (Mask[i] != Base + i) {
2994 DiffPos.push_back(i);
2995 if (DiffPos.size() > 2)
2996 return SDValue();
2997 }
2998 }
2999
3000 // Need exactly two differing element to lower into XVEXTRINS.
3001 // If only one differing element, the element at a distance of
3002 // HalfSize from it must be undef.
3003 if (DiffPos.size() == 1) {
3004 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
3005 DiffPos.push_back(DiffPos[0] + HalfSize);
3006 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
3007 DiffPos.insert(DiffPos.begin(), DiffPos[0] - HalfSize);
3008 else
3009 return SDValue();
3010 }
3011 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
3012 return SDValue();
3013
3014 // DiffMask must be in its low or high part.
3015 int DiffMaskLo = Mask[DiffPos[0]];
3016 int DiffMaskHi = Mask[DiffPos[1]];
3017 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
3018 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
3019 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
3020 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
3021 return SDValue();
3022 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
3023 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
3024 return SDValue();
3025 if (DiffMaskHi != DiffMaskLo + HalfSize)
3026 return SDValue();
3027
3028 // Determine source vector and source index.
3029 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
3030 int SrcIdxLo =
3031 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
3032 bool IsEltFP = EltVT.isFloatingPoint();
3033
3034 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
3035 // the patterns of XVEXTRINS in tablegen.
3036 SDValue BaseVec = (Base == 0) ? V1 : V2;
3037 SDValue EltLo =
3038 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3039 SrcVec, DAG.getConstant(SrcIdxLo, DL, GRLenVT));
3040 SDValue InsLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, BaseVec, EltLo,
3041 DAG.getConstant(DiffPos[0], DL, GRLenVT));
3042 SDValue EltHi =
3043 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3044 SrcVec, DAG.getConstant(SrcIdxLo + HalfSize, DL, GRLenVT));
3045 SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsLo, EltHi,
3046 DAG.getConstant(DiffPos[1], DL, GRLenVT));
3047
3048 return Result;
3049 };
3050
3051 // Try [0, n-1) insertion then [n, 2n-1) insertion.
3052 if (SDValue Result = tryLowerToExtrAndIns(0))
3053 return Result;
3054 return tryLowerToExtrAndIns(NumElts);
3055}
3056
3057/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
3058static SDValue
3060 SDValue V1, SDValue V2, SelectionDAG &DAG,
3061 const LoongArchSubtarget &Subtarget) {
3062 // LoongArch LASX only supports xvinsve0.{w/d}.
3063 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
3064 VT != MVT::v4f64)
3065 return SDValue();
3066
3067 MVT GRLenVT = Subtarget.getGRLenVT();
3068 int MaskSize = Mask.size();
3069 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
3070
3071 // Check if exactly one element of the Mask is replaced by 'Replaced', while
3072 // all other elements are either 'Base + i' or undef (-1). On success, return
3073 // the index of the replaced element. Otherwise, just return -1.
3074 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
3075 int Idx = -1;
3076 for (int i = 0; i < MaskSize; ++i) {
3077 if (Mask[i] == Base + i || Mask[i] == -1)
3078 continue;
3079 if (Mask[i] != Replaced)
3080 return -1;
3081 if (Idx == -1)
3082 Idx = i;
3083 else
3084 return -1;
3085 }
3086 return Idx;
3087 };
3088
3089 // Case 1: the lowest element of V2 replaces one element in V1.
3090 int Idx = checkReplaceOne(0, MaskSize);
3091 if (Idx != -1)
3092 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
3093 DAG.getConstant(Idx, DL, GRLenVT));
3094
3095 // Case 2: the lowest element of V1 replaces one element in V2.
3096 Idx = checkReplaceOne(MaskSize, 0);
3097 if (Idx != -1)
3098 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
3099 DAG.getConstant(Idx, DL, GRLenVT));
3100
3101 return SDValue();
3102}
3103
3104/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
3106 MVT VT, SDValue V1, SDValue V2,
3107 SelectionDAG &DAG) {
3108
3109 int MaskSize = Mask.size();
3110 int HalfSize = Mask.size() / 2;
3111 const auto &Begin = Mask.begin();
3112 const auto &Mid = Mask.begin() + HalfSize;
3113 const auto &End = Mask.end();
3114
3115 // VECTOR_SHUFFLE concatenates the vectors:
3116 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
3117 // shuffling ->
3118 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
3119 //
3120 // XVSHUF concatenates the vectors:
3121 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
3122 // shuffling ->
3123 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
3124 SmallVector<SDValue, 8> MaskAlloc;
3125 for (auto it = Begin; it < Mid; it++) {
3126 if (*it < 0) // UNDEF
3127 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3128 else if ((*it >= 0 && *it < HalfSize) ||
3129 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
3130 int M = *it < HalfSize ? *it : *it - HalfSize;
3131 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3132 } else
3133 return SDValue();
3134 }
3135 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
3136
3137 for (auto it = Mid; it < End; it++) {
3138 if (*it < 0) // UNDEF
3139 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3140 else if ((*it >= HalfSize && *it < MaskSize) ||
3141 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
3142 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
3143 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3144 } else
3145 return SDValue();
3146 }
3147 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
3148
3149 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
3150 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
3151 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
3152}
3153
3154/// Shuffle vectors by lane to generate more optimized instructions.
3155/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
3156///
3157/// Therefore, except for the following four cases, other cases are regarded
3158/// as cross-lane shuffles, where optimization is relatively limited.
3159///
3160/// - Shuffle high, low lanes of two inputs vector
3161/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
3162/// - Shuffle low, high lanes of two inputs vector
3163/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
3164/// - Shuffle low, low lanes of two inputs vector
3165/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
3166/// - Shuffle high, high lanes of two inputs vector
3167/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
3168///
3169/// The first case is the closest to LoongArch instructions and the other
3170/// cases need to be converted to it for processing.
3171///
3172/// This function will return true for the last three cases above and will
3173/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3174/// cross-lane shuffle cases.
3176 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3177 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3178
3179 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3180
3181 int MaskSize = Mask.size();
3182 int HalfSize = Mask.size() / 2;
3183 MVT GRLenVT = Subtarget.getGRLenVT();
3184
3185 HalfMaskType preMask = None, postMask = None;
3186
3187 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3188 return M < 0 || (M >= 0 && M < HalfSize) ||
3189 (M >= MaskSize && M < MaskSize + HalfSize);
3190 }))
3191 preMask = HighLaneTy;
3192 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3193 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3194 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3195 }))
3196 preMask = LowLaneTy;
3197
3198 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3199 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3200 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3201 }))
3202 postMask = LowLaneTy;
3203 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3204 return M < 0 || (M >= 0 && M < HalfSize) ||
3205 (M >= MaskSize && M < MaskSize + HalfSize);
3206 }))
3207 postMask = HighLaneTy;
3208
3209 // The pre-half of mask is high lane type, and the post-half of mask
3210 // is low lane type, which is closest to the LoongArch instructions.
3211 //
3212 // Note: In the LoongArch architecture, the high lane of mask corresponds
3213 // to the lower 128-bit of vector register, and the low lane of mask
3214 // corresponds the higher 128-bit of vector register.
3215 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3216 return false;
3217 }
3218 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3219 V1 = DAG.getBitcast(MVT::v4i64, V1);
3220 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3221 DAG.getConstant(0b01001110, DL, GRLenVT));
3222 V1 = DAG.getBitcast(VT, V1);
3223
3224 if (!V2.isUndef()) {
3225 V2 = DAG.getBitcast(MVT::v4i64, V2);
3226 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3227 DAG.getConstant(0b01001110, DL, GRLenVT));
3228 V2 = DAG.getBitcast(VT, V2);
3229 }
3230
3231 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3232 *it = *it < 0 ? *it : *it - HalfSize;
3233 }
3234 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3235 *it = *it < 0 ? *it : *it + HalfSize;
3236 }
3237 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3238 V1 = DAG.getBitcast(MVT::v4i64, V1);
3239 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3240 DAG.getConstant(0b11101110, DL, GRLenVT));
3241 V1 = DAG.getBitcast(VT, V1);
3242
3243 if (!V2.isUndef()) {
3244 V2 = DAG.getBitcast(MVT::v4i64, V2);
3245 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3246 DAG.getConstant(0b11101110, DL, GRLenVT));
3247 V2 = DAG.getBitcast(VT, V2);
3248 }
3249
3250 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3251 *it = *it < 0 ? *it : *it - HalfSize;
3252 }
3253 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3254 V1 = DAG.getBitcast(MVT::v4i64, V1);
3255 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3256 DAG.getConstant(0b01000100, DL, GRLenVT));
3257 V1 = DAG.getBitcast(VT, V1);
3258
3259 if (!V2.isUndef()) {
3260 V2 = DAG.getBitcast(MVT::v4i64, V2);
3261 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3262 DAG.getConstant(0b01000100, DL, GRLenVT));
3263 V2 = DAG.getBitcast(VT, V2);
3264 }
3265
3266 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3267 *it = *it < 0 ? *it : *it + HalfSize;
3268 }
3269 } else { // cross-lane
3270 return false;
3271 }
3272
3273 return true;
3274}
3275
3276/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3277/// Only for 256-bit vector.
3278///
3279/// For example:
3280/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3281/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3282/// is lowerded to:
3283/// (XVPERMI $xr2, $xr0, 78)
3284/// (XVSHUF $xr1, $xr2, $xr0)
3285/// (XVORI $xr0, $xr1, 0)
3287 ArrayRef<int> Mask,
3288 MVT VT, SDValue V1,
3289 SDValue V2,
3290 SelectionDAG &DAG) {
3291 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3292 int Size = Mask.size();
3293 int LaneSize = Size / 2;
3294
3295 bool LaneCrossing[2] = {false, false};
3296 for (int i = 0; i < Size; ++i)
3297 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3298 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3299
3300 // Ensure that all lanes ared involved.
3301 if (!LaneCrossing[0] && !LaneCrossing[1])
3302 return SDValue();
3303
3304 SmallVector<int> InLaneMask;
3305 InLaneMask.assign(Mask.begin(), Mask.end());
3306 for (int i = 0; i < Size; ++i) {
3307 int &M = InLaneMask[i];
3308 if (M < 0)
3309 continue;
3310 if (((M % Size) / LaneSize) != (i / LaneSize))
3311 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3312 }
3313
3314 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
3315 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
3316 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
3317 Flipped = DAG.getBitcast(VT, Flipped);
3318 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
3319}
3320
3321/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3322///
3323/// This routine breaks down the specific type of 256-bit shuffle and
3324/// dispatches to the lowering routines accordingly.
3326 SDValue V1, SDValue V2, SelectionDAG &DAG,
3327 const LoongArchSubtarget &Subtarget) {
3328 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3329 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3330 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3331 "Vector type is unsupported for lasx!");
3333 "Two operands have different types!");
3334 assert(VT.getVectorNumElements() == Mask.size() &&
3335 "Unexpected mask size for shuffle!");
3336 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3337 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3338
3339 APInt KnownUndef, KnownZero;
3340 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3341 APInt Zeroable = KnownUndef | KnownZero;
3342
3343 SDValue Result;
3344 // TODO: Add more comparison patterns.
3345 if (V2.isUndef()) {
3346 if ((Result =
3347 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3348 return Result;
3349 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3350 Subtarget)))
3351 return Result;
3352 // Try to widen vectors to gain more optimization opportunities.
3353 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3354 return NewShuffle;
3355 if ((Result =
3356 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3357 return Result;
3358 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3359 return Result;
3360 if ((Result =
3361 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3362 return Result;
3363
3364 // TODO: This comment may be enabled in the future to better match the
3365 // pattern for instruction selection.
3366 /* V2 = V1; */
3367 }
3368
3369 // It is recommended not to change the pattern comparison order for better
3370 // performance.
3371 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3372 return Result;
3373 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3374 return Result;
3375 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3376 return Result;
3377 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3378 return Result;
3379 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3380 return Result;
3381 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3382 return Result;
3383 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3384 (Result =
3385 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3386 return Result;
3387 if ((Result =
3388 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3389 return Result;
3390 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3391 Zeroable)))
3392 return Result;
3393 if ((Result =
3394 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3395 return Result;
3396 if ((Result =
3397 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3398 return Result;
3399 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3400 Subtarget)))
3401 return Result;
3402
3403 // canonicalize non cross-lane shuffle vector
3404 SmallVector<int> NewMask(Mask);
3405 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3406 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3407
3408 // FIXME: Handling the remaining cases earlier can degrade performance
3409 // in some situations. Further analysis is required to enable more
3410 // effective optimizations.
3411 if (V2.isUndef()) {
3412 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3413 V1, V2, DAG)))
3414 return Result;
3415 }
3416
3417 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3418 return NewShuffle;
3419 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3420 return Result;
3421
3422 return SDValue();
3423}
3424
3425SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3426 SelectionDAG &DAG) const {
3427 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3428 ArrayRef<int> OrigMask = SVOp->getMask();
3429 SDValue V1 = Op.getOperand(0);
3430 SDValue V2 = Op.getOperand(1);
3431 MVT VT = Op.getSimpleValueType();
3432 int NumElements = VT.getVectorNumElements();
3433 SDLoc DL(Op);
3434
3435 bool V1IsUndef = V1.isUndef();
3436 bool V2IsUndef = V2.isUndef();
3437 if (V1IsUndef && V2IsUndef)
3438 return DAG.getUNDEF(VT);
3439
3440 // When we create a shuffle node we put the UNDEF node to second operand,
3441 // but in some cases the first operand may be transformed to UNDEF.
3442 // In this case we should just commute the node.
3443 if (V1IsUndef)
3444 return DAG.getCommutedVectorShuffle(*SVOp);
3445
3446 // Check for non-undef masks pointing at an undef vector and make the masks
3447 // undef as well. This makes it easier to match the shuffle based solely on
3448 // the mask.
3449 if (V2IsUndef &&
3450 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3451 SmallVector<int, 8> NewMask(OrigMask);
3452 for (int &M : NewMask)
3453 if (M >= NumElements)
3454 M = -1;
3455 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3456 }
3457
3458 // Check for illegal shuffle mask element index values.
3459 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3460 (void)MaskUpperLimit;
3461 assert(llvm::all_of(OrigMask,
3462 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3463 "Out of bounds shuffle index");
3464
3465 // For each vector width, delegate to a specialized lowering routine.
3466 if (VT.is128BitVector())
3467 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3468
3469 if (VT.is256BitVector())
3470 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3471
3472 return SDValue();
3473}
3474
3475SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3476 SelectionDAG &DAG) const {
3477 // Custom lower to ensure the libcall return is passed in an FPR on hard
3478 // float ABIs.
3479 SDLoc DL(Op);
3480 MakeLibCallOptions CallOptions;
3481 SDValue Op0 = Op.getOperand(0);
3482 SDValue Chain = SDValue();
3483 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3484 SDValue Res;
3485 std::tie(Res, Chain) =
3486 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3487 if (Subtarget.is64Bit())
3488 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3489 return DAG.getBitcast(MVT::i32, Res);
3490}
3491
3492SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3493 SelectionDAG &DAG) const {
3494 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3495 // float ABIs.
3496 SDLoc DL(Op);
3497 MakeLibCallOptions CallOptions;
3498 SDValue Op0 = Op.getOperand(0);
3499 SDValue Chain = SDValue();
3500 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3501 DL, MVT::f32, Op0)
3502 : DAG.getBitcast(MVT::f32, Op0);
3503 SDValue Res;
3504 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3505 CallOptions, DL, Chain);
3506 return Res;
3507}
3508
3509SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3510 SelectionDAG &DAG) const {
3511 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3512 SDLoc DL(Op);
3513 MakeLibCallOptions CallOptions;
3514 RTLIB::Libcall LC =
3515 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3516 SDValue Res =
3517 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3518 if (Subtarget.is64Bit())
3519 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3520 return DAG.getBitcast(MVT::i32, Res);
3521}
3522
3523SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3526 MVT VT = Op.getSimpleValueType();
3527 SDLoc DL(Op);
3528 Op = DAG.getNode(
3529 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3530 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3531 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3532 DL, MVT::f32, Op)
3533 : DAG.getBitcast(MVT::f32, Op);
3534 if (VT != MVT::f32)
3535 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3536 return Res;
3537}
3538
3539// Lower BUILD_VECTOR as broadcast load (if possible).
3540// For example:
3541// %a = load i8, ptr %ptr
3542// %b = build_vector %a, %a, %a, %a
3543// is lowered to :
3544// (VLDREPL_B $a0, 0)
3546 const SDLoc &DL,
3547 SelectionDAG &DAG) {
3548 MVT VT = BVOp->getSimpleValueType(0);
3549 int NumOps = BVOp->getNumOperands();
3550
3551 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3552 "Unsupported vector type for broadcast.");
3553
3554 SDValue IdentitySrc;
3555 bool IsIdeneity = true;
3556
3557 for (int i = 0; i != NumOps; i++) {
3558 SDValue Op = BVOp->getOperand(i);
3559 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3560 IsIdeneity = false;
3561 break;
3562 }
3563 IdentitySrc = BVOp->getOperand(0);
3564 }
3565
3566 // make sure that this load is valid and only has one user.
3567 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3568 return SDValue();
3569
3570 auto *LN = cast<LoadSDNode>(IdentitySrc);
3571 auto ExtType = LN->getExtensionType();
3572
3573 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3574 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3575 // Indexed loads and stores are not supported on LoongArch.
3576 assert(LN->isUnindexed() && "Unexpected indexed load.");
3577
3578 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3579 // The offset operand of unindexed load is always undefined, so there is
3580 // no need to pass it to VLDREPL.
3581 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3582 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3583 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3584 return BCast;
3585 }
3586 return SDValue();
3587}
3588
3589// Sequentially insert elements from Ops into Vector, from low to high indices.
3590// Note: Ops can have fewer elements than Vector.
3592 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3593 EVT ResTy) {
3594 assert(Ops.size() <= ResTy.getVectorNumElements());
3595
3596 SDValue Op0 = Ops[0];
3597 if (!Op0.isUndef())
3598 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3599 for (unsigned i = 1; i < Ops.size(); ++i) {
3600 SDValue Opi = Ops[i];
3601 if (Opi.isUndef())
3602 continue;
3603 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3604 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3605 }
3606}
3607
3608// Build a ResTy subvector from Node, taking NumElts elements starting at index
3609// 'first'.
3611 SelectionDAG &DAG, SDLoc DL,
3612 const LoongArchSubtarget &Subtarget,
3613 EVT ResTy, unsigned first) {
3614 unsigned NumElts = ResTy.getVectorNumElements();
3615
3616 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3617
3618 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3619 Node->op_begin() + first + NumElts);
3620 SDValue Vector = DAG.getUNDEF(ResTy);
3621 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3622 return Vector;
3623}
3624
3625SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3626 SelectionDAG &DAG) const {
3627 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3628 MVT VT = Node->getSimpleValueType(0);
3629 EVT ResTy = Op->getValueType(0);
3630 unsigned NumElts = ResTy.getVectorNumElements();
3631 SDLoc DL(Op);
3632 APInt SplatValue, SplatUndef;
3633 unsigned SplatBitSize;
3634 bool HasAnyUndefs;
3635 bool IsConstant = false;
3636 bool UseSameConstant = true;
3637 SDValue ConstantValue;
3638 bool Is128Vec = ResTy.is128BitVector();
3639 bool Is256Vec = ResTy.is256BitVector();
3640
3641 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3642 (!Subtarget.hasExtLASX() || !Is256Vec))
3643 return SDValue();
3644
3645 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3646 return Result;
3647
3648 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3649 /*MinSplatBits=*/8) &&
3650 SplatBitSize <= 64) {
3651 // We can only cope with 8, 16, 32, or 64-bit elements.
3652 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3653 SplatBitSize != 64)
3654 return SDValue();
3655
3656 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3657 // We can only handle 64-bit elements that are within
3658 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3659 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3660 if (!SplatValue.isSignedIntN(10) &&
3661 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3662 return SDValue();
3663 if ((Is128Vec && ResTy == MVT::v4i32) ||
3664 (Is256Vec && ResTy == MVT::v8i32))
3665 return Op;
3666 }
3667
3668 EVT ViaVecTy;
3669
3670 switch (SplatBitSize) {
3671 default:
3672 return SDValue();
3673 case 8:
3674 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3675 break;
3676 case 16:
3677 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3678 break;
3679 case 32:
3680 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3681 break;
3682 case 64:
3683 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3684 break;
3685 }
3686
3687 // SelectionDAG::getConstant will promote SplatValue appropriately.
3688 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3689
3690 // Bitcast to the type we originally wanted.
3691 if (ViaVecTy != ResTy)
3692 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3693
3694 return Result;
3695 }
3696
3697 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3698 return Op;
3699
3700 for (unsigned i = 0; i < NumElts; ++i) {
3701 SDValue Opi = Node->getOperand(i);
3702 if (isIntOrFPConstant(Opi)) {
3703 IsConstant = true;
3704 if (!ConstantValue.getNode())
3705 ConstantValue = Opi;
3706 else if (ConstantValue != Opi)
3707 UseSameConstant = false;
3708 }
3709 }
3710
3711 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3712 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3713 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3714 for (unsigned i = 0; i < NumElts; ++i) {
3715 SDValue Opi = Node->getOperand(i);
3716 if (!isIntOrFPConstant(Opi))
3717 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3718 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3719 }
3720 return Result;
3721 }
3722
3723 if (!IsConstant) {
3724 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3725 // the sub-sequence of the vector and then broadcast the sub-sequence.
3726 //
3727 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3728 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3729 // generates worse code in some cases. This could be further optimized
3730 // with more consideration.
3732 BitVector UndefElements;
3733 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3734 UndefElements.count() == 0) {
3735 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3736 // because the high part can be simply treated as undef.
3737 SDValue Vector = DAG.getUNDEF(ResTy);
3738 EVT FillTy = Is256Vec
3740 : ResTy;
3741 SDValue FillVec =
3742 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3743
3744 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3745
3746 unsigned SeqLen = Sequence.size();
3747 unsigned SplatLen = NumElts / SeqLen;
3748 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3749 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3750
3751 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3752 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3753 if (SplatEltTy == MVT::i128)
3754 SplatTy = MVT::v4i64;
3755
3756 SDValue SplatVec;
3757 SDValue SrcVec = DAG.getBitcast(
3758 SplatTy,
3759 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3760 if (Is256Vec) {
3761 SplatVec =
3762 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3763 : LoongArchISD::XVREPLVE0,
3764 DL, SplatTy, SrcVec);
3765 } else {
3766 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3767 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3768 }
3769
3770 return DAG.getBitcast(ResTy, SplatVec);
3771 }
3772
3773 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3774 // using memory operations is much lower.
3775 //
3776 // For 256-bit vectors, normally split into two halves and concatenate.
3777 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3778 // one non-undef element, skip spliting to avoid a worse result.
3779 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3780 ResTy == MVT::v4f64) {
3781 unsigned NonUndefCount = 0;
3782 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3783 if (!Node->getOperand(i).isUndef()) {
3784 ++NonUndefCount;
3785 if (NonUndefCount > 1)
3786 break;
3787 }
3788 }
3789 if (NonUndefCount == 1)
3790 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3791 }
3792
3793 EVT VecTy =
3794 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3795 SDValue Vector =
3796 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3797
3798 if (Is128Vec)
3799 return Vector;
3800
3801 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3802 VecTy, NumElts / 2);
3803
3804 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3805 }
3806
3807 return SDValue();
3808}
3809
3810SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3811 SelectionDAG &DAG) const {
3812 SDLoc DL(Op);
3813 MVT ResVT = Op.getSimpleValueType();
3814 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3815
3816 unsigned NumOperands = Op.getNumOperands();
3817 unsigned NumFreezeUndef = 0;
3818 unsigned NumZero = 0;
3819 unsigned NumNonZero = 0;
3820 unsigned NonZeros = 0;
3821 SmallSet<SDValue, 4> Undefs;
3822 for (unsigned i = 0; i != NumOperands; ++i) {
3823 SDValue SubVec = Op.getOperand(i);
3824 if (SubVec.isUndef())
3825 continue;
3826 if (ISD::isFreezeUndef(SubVec.getNode())) {
3827 // If the freeze(undef) has multiple uses then we must fold to zero.
3828 if (SubVec.hasOneUse()) {
3829 ++NumFreezeUndef;
3830 } else {
3831 ++NumZero;
3832 Undefs.insert(SubVec);
3833 }
3834 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3835 ++NumZero;
3836 else {
3837 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3838 NonZeros |= 1 << i;
3839 ++NumNonZero;
3840 }
3841 }
3842
3843 // If we have more than 2 non-zeros, build each half separately.
3844 if (NumNonZero > 2) {
3845 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3846 ArrayRef<SDUse> Ops = Op->ops();
3847 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3848 Ops.slice(0, NumOperands / 2));
3849 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3850 Ops.slice(NumOperands / 2));
3851 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3852 }
3853
3854 // Otherwise, build it up through insert_subvectors.
3855 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3856 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3857 : DAG.getUNDEF(ResVT));
3858
3859 // Replace Undef operands with ZeroVector.
3860 for (SDValue U : Undefs)
3861 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3862
3863 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3864 unsigned NumSubElems = SubVT.getVectorNumElements();
3865 for (unsigned i = 0; i != NumOperands; ++i) {
3866 if ((NonZeros & (1 << i)) == 0)
3867 continue;
3868
3869 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3870 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3871 }
3872
3873 return Vec;
3874}
3875
3876SDValue
3877LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3878 SelectionDAG &DAG) const {
3879 MVT EltVT = Op.getSimpleValueType();
3880 SDValue Vec = Op->getOperand(0);
3881 EVT VecTy = Vec->getValueType(0);
3882 SDValue Idx = Op->getOperand(1);
3883 SDLoc DL(Op);
3884 MVT GRLenVT = Subtarget.getGRLenVT();
3885
3886 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3887
3888 if (isa<ConstantSDNode>(Idx))
3889 return Op;
3890
3891 switch (VecTy.getSimpleVT().SimpleTy) {
3892 default:
3893 llvm_unreachable("Unexpected type");
3894 case MVT::v32i8:
3895 case MVT::v16i16:
3896 case MVT::v4i64:
3897 case MVT::v4f64: {
3898 // Extract the high half subvector and place it to the low half of a new
3899 // vector. It doesn't matter what the high half of the new vector is.
3900 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3901 SDValue VecHi =
3902 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3903 SDValue TmpVec =
3904 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3905 VecHi, DAG.getConstant(0, DL, GRLenVT));
3906
3907 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3908 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3909 // desired element.
3910 SDValue IdxCp =
3911 Subtarget.is64Bit()
3912 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3913 : DAG.getBitcast(MVT::f32, Idx);
3914 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3915 SDValue MaskVec =
3916 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3917 SDValue ResVec =
3918 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3919
3920 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3921 DAG.getConstant(0, DL, GRLenVT));
3922 }
3923 case MVT::v8i32:
3924 case MVT::v8f32: {
3925 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3926 SDValue SplatValue =
3927 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3928
3929 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3930 DAG.getConstant(0, DL, GRLenVT));
3931 }
3932 }
3933}
3934
3935SDValue
3936LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3937 SelectionDAG &DAG) const {
3938 MVT VT = Op.getSimpleValueType();
3939 MVT EltVT = VT.getVectorElementType();
3940 unsigned NumElts = VT.getVectorNumElements();
3941 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3942 SDLoc DL(Op);
3943 SDValue Op0 = Op.getOperand(0);
3944 SDValue Op1 = Op.getOperand(1);
3945 SDValue Op2 = Op.getOperand(2);
3946
3947 if (isa<ConstantSDNode>(Op2))
3948 return Op;
3949
3950 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3951 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3952
3953 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3954 return SDValue();
3955
3956 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3957 SmallVector<SDValue, 32> RawIndices;
3958 SDValue SplatIdx;
3959 SDValue Indices;
3960
3961 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3962 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3963 for (unsigned i = 0; i < NumElts; ++i) {
3964 RawIndices.push_back(Op2);
3965 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3966 }
3967 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3968 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3969
3970 RawIndices.clear();
3971 for (unsigned i = 0; i < NumElts; ++i) {
3972 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3973 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3974 }
3975 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3976 Indices = DAG.getBitcast(IdxVTy, Indices);
3977 } else {
3978 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3979
3980 for (unsigned i = 0; i < NumElts; ++i)
3981 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3982 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3983 }
3984
3985 // insert vec, elt, idx
3986 // =>
3987 // select (splatidx == {0,1,2...}) ? splatelt : vec
3988 SDValue SelectCC =
3989 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3990 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3991}
3992
3993SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3994 SelectionDAG &DAG) const {
3995 SDLoc DL(Op);
3996 SyncScope::ID FenceSSID =
3997 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3998
3999 // singlethread fences only synchronize with signal handlers on the same
4000 // thread and thus only need to preserve instruction order, not actually
4001 // enforce memory ordering.
4002 if (FenceSSID == SyncScope::SingleThread)
4003 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4004 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4005
4006 return Op;
4007}
4008
4009SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
4010 SelectionDAG &DAG) const {
4011
4012 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
4013 DAG.getContext()->emitError(
4014 "On LA64, only 64-bit registers can be written.");
4015 return Op.getOperand(0);
4016 }
4017
4018 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
4019 DAG.getContext()->emitError(
4020 "On LA32, only 32-bit registers can be written.");
4021 return Op.getOperand(0);
4022 }
4023
4024 return Op;
4025}
4026
4027SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
4028 SelectionDAG &DAG) const {
4029 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
4030 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
4031 "be a constant integer");
4032 return SDValue();
4033 }
4034
4035 MachineFunction &MF = DAG.getMachineFunction();
4037 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
4038 EVT VT = Op.getValueType();
4039 SDLoc DL(Op);
4040 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
4041 unsigned Depth = Op.getConstantOperandVal(0);
4042 int GRLenInBytes = Subtarget.getGRLen() / 8;
4043
4044 while (Depth--) {
4045 int Offset = -(GRLenInBytes * 2);
4046 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
4047 DAG.getSignedConstant(Offset, DL, VT));
4048 FrameAddr =
4049 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
4050 }
4051 return FrameAddr;
4052}
4053
4054SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
4055 SelectionDAG &DAG) const {
4056 // Currently only support lowering return address for current frame.
4057 if (Op.getConstantOperandVal(0) != 0) {
4058 DAG.getContext()->emitError(
4059 "return address can only be determined for the current frame");
4060 return SDValue();
4061 }
4062
4063 MachineFunction &MF = DAG.getMachineFunction();
4065 MVT GRLenVT = Subtarget.getGRLenVT();
4066
4067 // Return the value of the return address register, marking it an implicit
4068 // live-in.
4069 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
4070 getRegClassFor(GRLenVT));
4071 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
4072}
4073
4074SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
4075 SelectionDAG &DAG) const {
4076 MachineFunction &MF = DAG.getMachineFunction();
4077 auto Size = Subtarget.getGRLen() / 8;
4078 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
4079 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4080}
4081
4082SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
4083 SelectionDAG &DAG) const {
4084 MachineFunction &MF = DAG.getMachineFunction();
4085 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
4086
4087 SDLoc DL(Op);
4088 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4090
4091 // vastart just stores the address of the VarArgsFrameIndex slot into the
4092 // memory location argument.
4093 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4094 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
4095 MachinePointerInfo(SV));
4096}
4097
4098SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
4099 SelectionDAG &DAG) const {
4100 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4101 !Subtarget.hasBasicD() && "unexpected target features");
4102
4103 SDLoc DL(Op);
4104 SDValue Op0 = Op.getOperand(0);
4105 if (Op0->getOpcode() == ISD::AND) {
4106 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
4107 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
4108 return Op;
4109 }
4110
4111 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
4112 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
4113 Op0.getConstantOperandVal(2) == UINT64_C(0))
4114 return Op;
4115
4116 if (Op0.getOpcode() == ISD::AssertZext &&
4117 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
4118 return Op;
4119
4120 EVT OpVT = Op0.getValueType();
4121 EVT RetVT = Op.getValueType();
4122 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
4123 MakeLibCallOptions CallOptions;
4124 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4125 SDValue Chain = SDValue();
4127 std::tie(Result, Chain) =
4128 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4129 return Result;
4130}
4131
4132SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
4133 SelectionDAG &DAG) const {
4134 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4135 !Subtarget.hasBasicD() && "unexpected target features");
4136
4137 SDLoc DL(Op);
4138 SDValue Op0 = Op.getOperand(0);
4139
4140 if ((Op0.getOpcode() == ISD::AssertSext ||
4142 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
4143 return Op;
4144
4145 EVT OpVT = Op0.getValueType();
4146 EVT RetVT = Op.getValueType();
4147 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
4148 MakeLibCallOptions CallOptions;
4149 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4150 SDValue Chain = SDValue();
4152 std::tie(Result, Chain) =
4153 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4154 return Result;
4155}
4156
4157SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
4158 SelectionDAG &DAG) const {
4159
4160 SDLoc DL(Op);
4161 EVT VT = Op.getValueType();
4162 SDValue Op0 = Op.getOperand(0);
4163 EVT Op0VT = Op0.getValueType();
4164
4165 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
4166 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
4167 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4168 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
4169 }
4170 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
4171 SDValue Lo, Hi;
4172 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4173 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
4174 }
4175 return Op;
4176}
4177
4178SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4179 SelectionDAG &DAG) const {
4180
4181 SDLoc DL(Op);
4182 SDValue Op0 = Op.getOperand(0);
4183
4184 if (Op0.getValueType() == MVT::f16)
4185 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
4186
4187 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4188 !Subtarget.hasBasicD()) {
4189 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
4190 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
4191 }
4192
4193 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
4194 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
4195 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
4196}
4197
4199 SelectionDAG &DAG, unsigned Flags) {
4200 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4201}
4202
4204 SelectionDAG &DAG, unsigned Flags) {
4205 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4206 Flags);
4207}
4208
4210 SelectionDAG &DAG, unsigned Flags) {
4211 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4212 N->getOffset(), Flags);
4213}
4214
4216 SelectionDAG &DAG, unsigned Flags) {
4217 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4218}
4219
4220template <class NodeTy>
4221SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4223 bool IsLocal) const {
4224 SDLoc DL(N);
4225 EVT Ty = getPointerTy(DAG.getDataLayout());
4226 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4227 SDValue Load;
4228
4229 switch (M) {
4230 default:
4231 report_fatal_error("Unsupported code model");
4232
4233 case CodeModel::Large: {
4234 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4235
4236 // This is not actually used, but is necessary for successfully matching
4237 // the PseudoLA_*_LARGE nodes.
4238 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4239 if (IsLocal) {
4240 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4241 // eventually becomes the desired 5-insn code sequence.
4242 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
4243 Tmp, Addr),
4244 0);
4245 } else {
4246 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4247 // eventually becomes the desired 5-insn code sequence.
4248 Load = SDValue(
4249 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
4250 0);
4251 }
4252 break;
4253 }
4254
4255 case CodeModel::Small:
4256 case CodeModel::Medium:
4257 if (IsLocal) {
4258 // This generates the pattern (PseudoLA_PCREL sym), which
4259 //
4260 // for la32r expands to:
4261 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4262 //
4263 // for la32s and la64 expands to:
4264 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4265 Load = SDValue(
4266 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
4267 } else {
4268 // This generates the pattern (PseudoLA_GOT sym), which
4269 //
4270 // for la32r expands to:
4271 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4272 //
4273 // for la32s and la64 expands to:
4274 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4275 Load =
4276 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
4277 }
4278 }
4279
4280 if (!IsLocal) {
4281 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4282 MachineFunction &MF = DAG.getMachineFunction();
4283 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4287 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4288 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
4289 }
4290
4291 return Load;
4292}
4293
4294SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4295 SelectionDAG &DAG) const {
4296 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
4297 DAG.getTarget().getCodeModel());
4298}
4299
4300SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4301 SelectionDAG &DAG) const {
4302 return getAddr(cast<JumpTableSDNode>(Op), DAG,
4303 DAG.getTarget().getCodeModel());
4304}
4305
4306SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4307 SelectionDAG &DAG) const {
4308 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
4309 DAG.getTarget().getCodeModel());
4310}
4311
4312SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4313 SelectionDAG &DAG) const {
4314 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4315 assert(N->getOffset() == 0 && "unexpected offset in global node");
4316 auto CM = DAG.getTarget().getCodeModel();
4317 const GlobalValue *GV = N->getGlobal();
4318
4319 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
4320 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
4321 CM = *GCM;
4322 }
4323
4324 return getAddr(N, DAG, CM, GV->isDSOLocal());
4325}
4326
4327SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4328 SelectionDAG &DAG,
4329 unsigned Opc, bool UseGOT,
4330 bool Large) const {
4331 SDLoc DL(N);
4332 EVT Ty = getPointerTy(DAG.getDataLayout());
4333 MVT GRLenVT = Subtarget.getGRLenVT();
4334
4335 // This is not actually used, but is necessary for successfully matching the
4336 // PseudoLA_*_LARGE nodes.
4337 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4338 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4339
4340 // Only IE needs an extra argument for large code model.
4341 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4342 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4343 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4344
4345 // If it is LE for normal/medium code model, the add tp operation will occur
4346 // during the pseudo-instruction expansion.
4347 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4348 return Offset;
4349
4350 if (UseGOT) {
4351 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4352 MachineFunction &MF = DAG.getMachineFunction();
4353 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4357 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4358 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4359 }
4360
4361 // Add the thread pointer.
4362 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4363 DAG.getRegister(LoongArch::R2, GRLenVT));
4364}
4365
4366SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4367 SelectionDAG &DAG,
4368 unsigned Opc,
4369 bool Large) const {
4370 SDLoc DL(N);
4371 EVT Ty = getPointerTy(DAG.getDataLayout());
4372 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4373
4374 // This is not actually used, but is necessary for successfully matching the
4375 // PseudoLA_*_LARGE nodes.
4376 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4377
4378 // Use a PC-relative addressing mode to access the dynamic GOT address.
4379 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4380 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4381 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4382
4383 // Prepare argument list to generate call.
4385 Args.emplace_back(Load, CallTy);
4386
4387 // Setup call to __tls_get_addr.
4388 TargetLowering::CallLoweringInfo CLI(DAG);
4389 CLI.setDebugLoc(DL)
4390 .setChain(DAG.getEntryNode())
4391 .setLibCallee(CallingConv::C, CallTy,
4392 DAG.getExternalSymbol("__tls_get_addr", Ty),
4393 std::move(Args));
4394
4395 return LowerCallTo(CLI).first;
4396}
4397
4398SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4399 SelectionDAG &DAG, unsigned Opc,
4400 bool Large) const {
4401 SDLoc DL(N);
4402 EVT Ty = getPointerTy(DAG.getDataLayout());
4403 const GlobalValue *GV = N->getGlobal();
4404
4405 // This is not actually used, but is necessary for successfully matching the
4406 // PseudoLA_*_LARGE nodes.
4407 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4408
4409 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4410 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4411 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4412 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4413 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4414}
4415
4416SDValue
4417LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4418 SelectionDAG &DAG) const {
4421 report_fatal_error("In GHC calling convention TLS is not supported");
4422
4423 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4424 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4425
4426 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4427 assert(N->getOffset() == 0 && "unexpected offset in global node");
4428
4429 if (DAG.getTarget().useEmulatedTLS())
4430 reportFatalUsageError("the emulated TLS is prohibited");
4431
4432 bool IsDesc = DAG.getTarget().useTLSDESC();
4433
4434 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4436 // In this model, application code calls the dynamic linker function
4437 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4438 // runtime.
4439 if (!IsDesc)
4440 return getDynamicTLSAddr(N, DAG,
4441 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4442 : LoongArch::PseudoLA_TLS_GD,
4443 Large);
4444 break;
4446 // Same as GeneralDynamic, except for assembly modifiers and relocation
4447 // records.
4448 if (!IsDesc)
4449 return getDynamicTLSAddr(N, DAG,
4450 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4451 : LoongArch::PseudoLA_TLS_LD,
4452 Large);
4453 break;
4455 // This model uses the GOT to resolve TLS offsets.
4456 return getStaticTLSAddr(N, DAG,
4457 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4458 : LoongArch::PseudoLA_TLS_IE,
4459 /*UseGOT=*/true, Large);
4461 // This model is used when static linking as the TLS offsets are resolved
4462 // during program linking.
4463 //
4464 // This node doesn't need an extra argument for the large code model.
4465 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4466 /*UseGOT=*/false, Large);
4467 }
4468
4469 return getTLSDescAddr(N, DAG,
4470 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4471 : LoongArch::PseudoLA_TLS_DESC,
4472 Large);
4473}
4474
4475template <unsigned N>
4477 SelectionDAG &DAG, bool IsSigned = false) {
4478 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4479 // Check the ImmArg.
4480 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4481 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4482 DAG.getContext()->emitError(Op->getOperationName(0) +
4483 ": argument out of range.");
4484 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4485 }
4486 return SDValue();
4487}
4488
4489SDValue
4490LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4491 SelectionDAG &DAG) const {
4492 switch (Op.getConstantOperandVal(0)) {
4493 default:
4494 return SDValue(); // Don't custom lower most intrinsics.
4495 case Intrinsic::thread_pointer: {
4496 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4497 return DAG.getRegister(LoongArch::R2, PtrVT);
4498 }
4499 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4500 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4501 case Intrinsic::loongarch_lsx_vreplvei_d:
4502 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4503 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4504 case Intrinsic::loongarch_lsx_vreplvei_w:
4505 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4506 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4507 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4508 case Intrinsic::loongarch_lasx_xvpickve_d:
4509 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4510 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4511 case Intrinsic::loongarch_lasx_xvinsve0_d:
4512 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4513 case Intrinsic::loongarch_lsx_vsat_b:
4514 case Intrinsic::loongarch_lsx_vsat_bu:
4515 case Intrinsic::loongarch_lsx_vrotri_b:
4516 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4517 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4518 case Intrinsic::loongarch_lsx_vsrlri_b:
4519 case Intrinsic::loongarch_lsx_vsrari_b:
4520 case Intrinsic::loongarch_lsx_vreplvei_h:
4521 case Intrinsic::loongarch_lasx_xvsat_b:
4522 case Intrinsic::loongarch_lasx_xvsat_bu:
4523 case Intrinsic::loongarch_lasx_xvrotri_b:
4524 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4525 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4526 case Intrinsic::loongarch_lasx_xvsrlri_b:
4527 case Intrinsic::loongarch_lasx_xvsrari_b:
4528 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4529 case Intrinsic::loongarch_lasx_xvpickve_w:
4530 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4531 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4532 case Intrinsic::loongarch_lasx_xvinsve0_w:
4533 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4534 case Intrinsic::loongarch_lsx_vsat_h:
4535 case Intrinsic::loongarch_lsx_vsat_hu:
4536 case Intrinsic::loongarch_lsx_vrotri_h:
4537 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4538 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4539 case Intrinsic::loongarch_lsx_vsrlri_h:
4540 case Intrinsic::loongarch_lsx_vsrari_h:
4541 case Intrinsic::loongarch_lsx_vreplvei_b:
4542 case Intrinsic::loongarch_lasx_xvsat_h:
4543 case Intrinsic::loongarch_lasx_xvsat_hu:
4544 case Intrinsic::loongarch_lasx_xvrotri_h:
4545 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4546 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4547 case Intrinsic::loongarch_lasx_xvsrlri_h:
4548 case Intrinsic::loongarch_lasx_xvsrari_h:
4549 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4550 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4551 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4552 case Intrinsic::loongarch_lsx_vsrani_b_h:
4553 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4554 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4555 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4556 case Intrinsic::loongarch_lsx_vssrani_b_h:
4557 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4558 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4559 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4560 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4561 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4562 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4563 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4564 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4565 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4566 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4567 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4568 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4569 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4570 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4571 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4572 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4573 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4574 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4575 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4576 case Intrinsic::loongarch_lsx_vsat_w:
4577 case Intrinsic::loongarch_lsx_vsat_wu:
4578 case Intrinsic::loongarch_lsx_vrotri_w:
4579 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4580 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4581 case Intrinsic::loongarch_lsx_vsrlri_w:
4582 case Intrinsic::loongarch_lsx_vsrari_w:
4583 case Intrinsic::loongarch_lsx_vslei_bu:
4584 case Intrinsic::loongarch_lsx_vslei_hu:
4585 case Intrinsic::loongarch_lsx_vslei_wu:
4586 case Intrinsic::loongarch_lsx_vslei_du:
4587 case Intrinsic::loongarch_lsx_vslti_bu:
4588 case Intrinsic::loongarch_lsx_vslti_hu:
4589 case Intrinsic::loongarch_lsx_vslti_wu:
4590 case Intrinsic::loongarch_lsx_vslti_du:
4591 case Intrinsic::loongarch_lsx_vbsll_v:
4592 case Intrinsic::loongarch_lsx_vbsrl_v:
4593 case Intrinsic::loongarch_lasx_xvsat_w:
4594 case Intrinsic::loongarch_lasx_xvsat_wu:
4595 case Intrinsic::loongarch_lasx_xvrotri_w:
4596 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4597 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4598 case Intrinsic::loongarch_lasx_xvsrlri_w:
4599 case Intrinsic::loongarch_lasx_xvsrari_w:
4600 case Intrinsic::loongarch_lasx_xvslei_bu:
4601 case Intrinsic::loongarch_lasx_xvslei_hu:
4602 case Intrinsic::loongarch_lasx_xvslei_wu:
4603 case Intrinsic::loongarch_lasx_xvslei_du:
4604 case Intrinsic::loongarch_lasx_xvslti_bu:
4605 case Intrinsic::loongarch_lasx_xvslti_hu:
4606 case Intrinsic::loongarch_lasx_xvslti_wu:
4607 case Intrinsic::loongarch_lasx_xvslti_du:
4608 case Intrinsic::loongarch_lasx_xvbsll_v:
4609 case Intrinsic::loongarch_lasx_xvbsrl_v:
4610 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4611 case Intrinsic::loongarch_lsx_vseqi_b:
4612 case Intrinsic::loongarch_lsx_vseqi_h:
4613 case Intrinsic::loongarch_lsx_vseqi_w:
4614 case Intrinsic::loongarch_lsx_vseqi_d:
4615 case Intrinsic::loongarch_lsx_vslei_b:
4616 case Intrinsic::loongarch_lsx_vslei_h:
4617 case Intrinsic::loongarch_lsx_vslei_w:
4618 case Intrinsic::loongarch_lsx_vslei_d:
4619 case Intrinsic::loongarch_lsx_vslti_b:
4620 case Intrinsic::loongarch_lsx_vslti_h:
4621 case Intrinsic::loongarch_lsx_vslti_w:
4622 case Intrinsic::loongarch_lsx_vslti_d:
4623 case Intrinsic::loongarch_lasx_xvseqi_b:
4624 case Intrinsic::loongarch_lasx_xvseqi_h:
4625 case Intrinsic::loongarch_lasx_xvseqi_w:
4626 case Intrinsic::loongarch_lasx_xvseqi_d:
4627 case Intrinsic::loongarch_lasx_xvslei_b:
4628 case Intrinsic::loongarch_lasx_xvslei_h:
4629 case Intrinsic::loongarch_lasx_xvslei_w:
4630 case Intrinsic::loongarch_lasx_xvslei_d:
4631 case Intrinsic::loongarch_lasx_xvslti_b:
4632 case Intrinsic::loongarch_lasx_xvslti_h:
4633 case Intrinsic::loongarch_lasx_xvslti_w:
4634 case Intrinsic::loongarch_lasx_xvslti_d:
4635 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4636 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4637 case Intrinsic::loongarch_lsx_vsrani_h_w:
4638 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4639 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4640 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4641 case Intrinsic::loongarch_lsx_vssrani_h_w:
4642 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4643 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4644 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4645 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4646 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4647 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4648 case Intrinsic::loongarch_lsx_vfrstpi_b:
4649 case Intrinsic::loongarch_lsx_vfrstpi_h:
4650 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4651 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4652 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4653 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4654 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4655 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4656 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4657 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4658 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4659 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4660 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4661 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4662 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4663 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4664 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4665 case Intrinsic::loongarch_lsx_vsat_d:
4666 case Intrinsic::loongarch_lsx_vsat_du:
4667 case Intrinsic::loongarch_lsx_vrotri_d:
4668 case Intrinsic::loongarch_lsx_vsrlri_d:
4669 case Intrinsic::loongarch_lsx_vsrari_d:
4670 case Intrinsic::loongarch_lasx_xvsat_d:
4671 case Intrinsic::loongarch_lasx_xvsat_du:
4672 case Intrinsic::loongarch_lasx_xvrotri_d:
4673 case Intrinsic::loongarch_lasx_xvsrlri_d:
4674 case Intrinsic::loongarch_lasx_xvsrari_d:
4675 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4676 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4677 case Intrinsic::loongarch_lsx_vsrani_w_d:
4678 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4679 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4680 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4681 case Intrinsic::loongarch_lsx_vssrani_w_d:
4682 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4683 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4684 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4685 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4686 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4687 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4688 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4689 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4690 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4691 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4692 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4693 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4694 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4695 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4696 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4697 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4698 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4699 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4700 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4701 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4702 case Intrinsic::loongarch_lsx_vsrani_d_q:
4703 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4704 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4705 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4706 case Intrinsic::loongarch_lsx_vssrani_d_q:
4707 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4708 case Intrinsic::loongarch_lsx_vssrani_du_q:
4709 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4710 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4711 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4712 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4713 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4714 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4715 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4716 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4717 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4718 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4719 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4720 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4721 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4722 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4723 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4724 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4725 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4726 case Intrinsic::loongarch_lsx_vnori_b:
4727 case Intrinsic::loongarch_lsx_vshuf4i_b:
4728 case Intrinsic::loongarch_lsx_vshuf4i_h:
4729 case Intrinsic::loongarch_lsx_vshuf4i_w:
4730 case Intrinsic::loongarch_lasx_xvnori_b:
4731 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4732 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4733 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4734 case Intrinsic::loongarch_lasx_xvpermi_d:
4735 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4736 case Intrinsic::loongarch_lsx_vshuf4i_d:
4737 case Intrinsic::loongarch_lsx_vpermi_w:
4738 case Intrinsic::loongarch_lsx_vbitseli_b:
4739 case Intrinsic::loongarch_lsx_vextrins_b:
4740 case Intrinsic::loongarch_lsx_vextrins_h:
4741 case Intrinsic::loongarch_lsx_vextrins_w:
4742 case Intrinsic::loongarch_lsx_vextrins_d:
4743 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4744 case Intrinsic::loongarch_lasx_xvpermi_w:
4745 case Intrinsic::loongarch_lasx_xvpermi_q:
4746 case Intrinsic::loongarch_lasx_xvbitseli_b:
4747 case Intrinsic::loongarch_lasx_xvextrins_b:
4748 case Intrinsic::loongarch_lasx_xvextrins_h:
4749 case Intrinsic::loongarch_lasx_xvextrins_w:
4750 case Intrinsic::loongarch_lasx_xvextrins_d:
4751 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4752 case Intrinsic::loongarch_lsx_vrepli_b:
4753 case Intrinsic::loongarch_lsx_vrepli_h:
4754 case Intrinsic::loongarch_lsx_vrepli_w:
4755 case Intrinsic::loongarch_lsx_vrepli_d:
4756 case Intrinsic::loongarch_lasx_xvrepli_b:
4757 case Intrinsic::loongarch_lasx_xvrepli_h:
4758 case Intrinsic::loongarch_lasx_xvrepli_w:
4759 case Intrinsic::loongarch_lasx_xvrepli_d:
4760 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4761 case Intrinsic::loongarch_lsx_vldi:
4762 case Intrinsic::loongarch_lasx_xvldi:
4763 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4764 }
4765}
4766
4767// Helper function that emits error message for intrinsics with chain and return
4768// merge values of a UNDEF and the chain.
4770 StringRef ErrorMsg,
4771 SelectionDAG &DAG) {
4772 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4773 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4774 SDLoc(Op));
4775}
4776
4777SDValue
4778LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4779 SelectionDAG &DAG) const {
4780 SDLoc DL(Op);
4781 MVT GRLenVT = Subtarget.getGRLenVT();
4782 EVT VT = Op.getValueType();
4783 SDValue Chain = Op.getOperand(0);
4784 const StringRef ErrorMsgOOR = "argument out of range";
4785 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4786 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4787
4788 switch (Op.getConstantOperandVal(1)) {
4789 default:
4790 return Op;
4791 case Intrinsic::loongarch_crc_w_b_w:
4792 case Intrinsic::loongarch_crc_w_h_w:
4793 case Intrinsic::loongarch_crc_w_w_w:
4794 case Intrinsic::loongarch_crc_w_d_w:
4795 case Intrinsic::loongarch_crcc_w_b_w:
4796 case Intrinsic::loongarch_crcc_w_h_w:
4797 case Intrinsic::loongarch_crcc_w_w_w:
4798 case Intrinsic::loongarch_crcc_w_d_w:
4799 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4800 case Intrinsic::loongarch_csrrd_w:
4801 case Intrinsic::loongarch_csrrd_d: {
4802 unsigned Imm = Op.getConstantOperandVal(2);
4803 return !isUInt<14>(Imm)
4804 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4805 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4806 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4807 }
4808 case Intrinsic::loongarch_csrwr_w:
4809 case Intrinsic::loongarch_csrwr_d: {
4810 unsigned Imm = Op.getConstantOperandVal(3);
4811 return !isUInt<14>(Imm)
4812 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4813 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4814 {Chain, Op.getOperand(2),
4815 DAG.getConstant(Imm, DL, GRLenVT)});
4816 }
4817 case Intrinsic::loongarch_csrxchg_w:
4818 case Intrinsic::loongarch_csrxchg_d: {
4819 unsigned Imm = Op.getConstantOperandVal(4);
4820 return !isUInt<14>(Imm)
4821 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4822 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4823 {Chain, Op.getOperand(2), Op.getOperand(3),
4824 DAG.getConstant(Imm, DL, GRLenVT)});
4825 }
4826 case Intrinsic::loongarch_iocsrrd_d: {
4827 return DAG.getNode(
4828 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4829 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4830 }
4831#define IOCSRRD_CASE(NAME, NODE) \
4832 case Intrinsic::loongarch_##NAME: { \
4833 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4834 {Chain, Op.getOperand(2)}); \
4835 }
4836 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4837 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4838 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4839#undef IOCSRRD_CASE
4840 case Intrinsic::loongarch_cpucfg: {
4841 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4842 {Chain, Op.getOperand(2)});
4843 }
4844 case Intrinsic::loongarch_lddir_d: {
4845 unsigned Imm = Op.getConstantOperandVal(3);
4846 return !isUInt<8>(Imm)
4847 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4848 : Op;
4849 }
4850 case Intrinsic::loongarch_movfcsr2gr: {
4851 if (!Subtarget.hasBasicF())
4852 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4853 unsigned Imm = Op.getConstantOperandVal(2);
4854 return !isUInt<2>(Imm)
4855 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4856 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4857 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4858 }
4859 case Intrinsic::loongarch_lsx_vld:
4860 case Intrinsic::loongarch_lsx_vldrepl_b:
4861 case Intrinsic::loongarch_lasx_xvld:
4862 case Intrinsic::loongarch_lasx_xvldrepl_b:
4863 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4864 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4865 : SDValue();
4866 case Intrinsic::loongarch_lsx_vldrepl_h:
4867 case Intrinsic::loongarch_lasx_xvldrepl_h:
4868 return !isShiftedInt<11, 1>(
4869 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4871 Op, "argument out of range or not a multiple of 2", DAG)
4872 : SDValue();
4873 case Intrinsic::loongarch_lsx_vldrepl_w:
4874 case Intrinsic::loongarch_lasx_xvldrepl_w:
4875 return !isShiftedInt<10, 2>(
4876 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4878 Op, "argument out of range or not a multiple of 4", DAG)
4879 : SDValue();
4880 case Intrinsic::loongarch_lsx_vldrepl_d:
4881 case Intrinsic::loongarch_lasx_xvldrepl_d:
4882 return !isShiftedInt<9, 3>(
4883 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4885 Op, "argument out of range or not a multiple of 8", DAG)
4886 : SDValue();
4887 }
4888}
4889
4890// Helper function that emits error message for intrinsics with void return
4891// value and return the chain.
4893 SelectionDAG &DAG) {
4894
4895 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4896 return Op.getOperand(0);
4897}
4898
4899SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4900 SelectionDAG &DAG) const {
4901 SDLoc DL(Op);
4902 MVT GRLenVT = Subtarget.getGRLenVT();
4903 SDValue Chain = Op.getOperand(0);
4904 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4905 SDValue Op2 = Op.getOperand(2);
4906 const StringRef ErrorMsgOOR = "argument out of range";
4907 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4908 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4909 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4910
4911 switch (IntrinsicEnum) {
4912 default:
4913 // TODO: Add more Intrinsics.
4914 return SDValue();
4915 case Intrinsic::loongarch_cacop_d:
4916 case Intrinsic::loongarch_cacop_w: {
4917 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4918 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4919 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4920 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4921 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4922 unsigned Imm1 = Op2->getAsZExtVal();
4923 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4924 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4925 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4926 return Op;
4927 }
4928 case Intrinsic::loongarch_dbar: {
4929 unsigned Imm = Op2->getAsZExtVal();
4930 return !isUInt<15>(Imm)
4931 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4932 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4933 DAG.getConstant(Imm, DL, GRLenVT));
4934 }
4935 case Intrinsic::loongarch_ibar: {
4936 unsigned Imm = Op2->getAsZExtVal();
4937 return !isUInt<15>(Imm)
4938 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4939 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4940 DAG.getConstant(Imm, DL, GRLenVT));
4941 }
4942 case Intrinsic::loongarch_break: {
4943 unsigned Imm = Op2->getAsZExtVal();
4944 return !isUInt<15>(Imm)
4945 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4946 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4947 DAG.getConstant(Imm, DL, GRLenVT));
4948 }
4949 case Intrinsic::loongarch_movgr2fcsr: {
4950 if (!Subtarget.hasBasicF())
4951 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4952 unsigned Imm = Op2->getAsZExtVal();
4953 return !isUInt<2>(Imm)
4954 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4955 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4956 DAG.getConstant(Imm, DL, GRLenVT),
4957 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4958 Op.getOperand(3)));
4959 }
4960 case Intrinsic::loongarch_syscall: {
4961 unsigned Imm = Op2->getAsZExtVal();
4962 return !isUInt<15>(Imm)
4963 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4964 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4965 DAG.getConstant(Imm, DL, GRLenVT));
4966 }
4967#define IOCSRWR_CASE(NAME, NODE) \
4968 case Intrinsic::loongarch_##NAME: { \
4969 SDValue Op3 = Op.getOperand(3); \
4970 return Subtarget.is64Bit() \
4971 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4972 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4973 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4974 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4975 Op3); \
4976 }
4977 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4978 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4979 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4980#undef IOCSRWR_CASE
4981 case Intrinsic::loongarch_iocsrwr_d: {
4982 return !Subtarget.is64Bit()
4983 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4984 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4985 Op2,
4986 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4987 Op.getOperand(3)));
4988 }
4989#define ASRT_LE_GT_CASE(NAME) \
4990 case Intrinsic::loongarch_##NAME: { \
4991 return !Subtarget.is64Bit() \
4992 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4993 : Op; \
4994 }
4995 ASRT_LE_GT_CASE(asrtle_d)
4996 ASRT_LE_GT_CASE(asrtgt_d)
4997#undef ASRT_LE_GT_CASE
4998 case Intrinsic::loongarch_ldpte_d: {
4999 unsigned Imm = Op.getConstantOperandVal(3);
5000 return !Subtarget.is64Bit()
5001 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
5002 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5003 : Op;
5004 }
5005 case Intrinsic::loongarch_lsx_vst:
5006 case Intrinsic::loongarch_lasx_xvst:
5007 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
5008 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5009 : SDValue();
5010 case Intrinsic::loongarch_lasx_xvstelm_b:
5011 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5012 !isUInt<5>(Op.getConstantOperandVal(5)))
5013 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5014 : SDValue();
5015 case Intrinsic::loongarch_lsx_vstelm_b:
5016 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5017 !isUInt<4>(Op.getConstantOperandVal(5)))
5018 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5019 : SDValue();
5020 case Intrinsic::loongarch_lasx_xvstelm_h:
5021 return (!isShiftedInt<8, 1>(
5022 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5023 !isUInt<4>(Op.getConstantOperandVal(5)))
5025 Op, "argument out of range or not a multiple of 2", DAG)
5026 : SDValue();
5027 case Intrinsic::loongarch_lsx_vstelm_h:
5028 return (!isShiftedInt<8, 1>(
5029 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5030 !isUInt<3>(Op.getConstantOperandVal(5)))
5032 Op, "argument out of range or not a multiple of 2", DAG)
5033 : SDValue();
5034 case Intrinsic::loongarch_lasx_xvstelm_w:
5035 return (!isShiftedInt<8, 2>(
5036 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5037 !isUInt<3>(Op.getConstantOperandVal(5)))
5039 Op, "argument out of range or not a multiple of 4", DAG)
5040 : SDValue();
5041 case Intrinsic::loongarch_lsx_vstelm_w:
5042 return (!isShiftedInt<8, 2>(
5043 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5044 !isUInt<2>(Op.getConstantOperandVal(5)))
5046 Op, "argument out of range or not a multiple of 4", DAG)
5047 : SDValue();
5048 case Intrinsic::loongarch_lasx_xvstelm_d:
5049 return (!isShiftedInt<8, 3>(
5050 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5051 !isUInt<2>(Op.getConstantOperandVal(5)))
5053 Op, "argument out of range or not a multiple of 8", DAG)
5054 : SDValue();
5055 case Intrinsic::loongarch_lsx_vstelm_d:
5056 return (!isShiftedInt<8, 3>(
5057 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5058 !isUInt<1>(Op.getConstantOperandVal(5)))
5060 Op, "argument out of range or not a multiple of 8", DAG)
5061 : SDValue();
5062 }
5063}
5064
5065SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
5066 SelectionDAG &DAG) const {
5067 SDLoc DL(Op);
5068 SDValue Lo = Op.getOperand(0);
5069 SDValue Hi = Op.getOperand(1);
5070 SDValue Shamt = Op.getOperand(2);
5071 EVT VT = Lo.getValueType();
5072
5073 // if Shamt-GRLen < 0: // Shamt < GRLen
5074 // Lo = Lo << Shamt
5075 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
5076 // else:
5077 // Lo = 0
5078 // Hi = Lo << (Shamt-GRLen)
5079
5080 SDValue Zero = DAG.getConstant(0, DL, VT);
5081 SDValue One = DAG.getConstant(1, DL, VT);
5082 SDValue MinusGRLen =
5083 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5084 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5085 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5086 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5087
5088 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
5089 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
5090 SDValue ShiftRightLo =
5091 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
5092 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
5093 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
5094 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
5095
5096 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5097
5098 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
5099 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5100
5101 SDValue Parts[2] = {Lo, Hi};
5102 return DAG.getMergeValues(Parts, DL);
5103}
5104
5105SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
5106 SelectionDAG &DAG,
5107 bool IsSRA) const {
5108 SDLoc DL(Op);
5109 SDValue Lo = Op.getOperand(0);
5110 SDValue Hi = Op.getOperand(1);
5111 SDValue Shamt = Op.getOperand(2);
5112 EVT VT = Lo.getValueType();
5113
5114 // SRA expansion:
5115 // if Shamt-GRLen < 0: // Shamt < GRLen
5116 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5117 // Hi = Hi >>s Shamt
5118 // else:
5119 // Lo = Hi >>s (Shamt-GRLen);
5120 // Hi = Hi >>s (GRLen-1)
5121 //
5122 // SRL expansion:
5123 // if Shamt-GRLen < 0: // Shamt < GRLen
5124 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5125 // Hi = Hi >>u Shamt
5126 // else:
5127 // Lo = Hi >>u (Shamt-GRLen);
5128 // Hi = 0;
5129
5130 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
5131
5132 SDValue Zero = DAG.getConstant(0, DL, VT);
5133 SDValue One = DAG.getConstant(1, DL, VT);
5134 SDValue MinusGRLen =
5135 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5136 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5137 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5138 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5139
5140 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
5141 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
5142 SDValue ShiftLeftHi =
5143 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
5144 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
5145 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
5146 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
5147 SDValue HiFalse =
5148 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
5149
5150 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5151
5152 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
5153 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5154
5155 SDValue Parts[2] = {Lo, Hi};
5156 return DAG.getMergeValues(Parts, DL);
5157}
5158
5159// Returns the opcode of the target-specific SDNode that implements the 32-bit
5160// form of the given Opcode.
5161static unsigned getLoongArchWOpcode(unsigned Opcode) {
5162 switch (Opcode) {
5163 default:
5164 llvm_unreachable("Unexpected opcode");
5165 case ISD::SDIV:
5166 return LoongArchISD::DIV_W;
5167 case ISD::UDIV:
5168 return LoongArchISD::DIV_WU;
5169 case ISD::SREM:
5170 return LoongArchISD::MOD_W;
5171 case ISD::UREM:
5172 return LoongArchISD::MOD_WU;
5173 case ISD::SHL:
5174 return LoongArchISD::SLL_W;
5175 case ISD::SRA:
5176 return LoongArchISD::SRA_W;
5177 case ISD::SRL:
5178 return LoongArchISD::SRL_W;
5179 case ISD::ROTL:
5180 case ISD::ROTR:
5181 return LoongArchISD::ROTR_W;
5182 case ISD::CTTZ:
5183 return LoongArchISD::CTZ_W;
5184 case ISD::CTLZ:
5185 return LoongArchISD::CLZ_W;
5186 }
5187}
5188
5189// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5190// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5191// otherwise be promoted to i64, making it difficult to select the
5192// SLL_W/.../*W later one because the fact the operation was originally of
5193// type i8/i16/i32 is lost.
5195 unsigned ExtOpc = ISD::ANY_EXTEND) {
5196 SDLoc DL(N);
5197 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
5198 SDValue NewOp0, NewRes;
5199
5200 switch (NumOp) {
5201 default:
5202 llvm_unreachable("Unexpected NumOp");
5203 case 1: {
5204 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5205 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
5206 break;
5207 }
5208 case 2: {
5209 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5210 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5211 if (N->getOpcode() == ISD::ROTL) {
5212 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
5213 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
5214 }
5215 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5216 break;
5217 }
5218 // TODO:Handle more NumOp.
5219 }
5220
5221 // ReplaceNodeResults requires we maintain the same type for the return
5222 // value.
5223 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5224}
5225
5226// Converts the given 32-bit operation to a i64 operation with signed extension
5227// semantic to reduce the signed extension instructions.
5229 SDLoc DL(N);
5230 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5231 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5232 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5233 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5234 DAG.getValueType(MVT::i32));
5235 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5236}
5237
5238// Helper function that emits error message for intrinsics with/without chain
5239// and return a UNDEF or and the chain as the results.
5242 StringRef ErrorMsg, bool WithChain = true) {
5243 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5244 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
5245 if (!WithChain)
5246 return;
5247 Results.push_back(N->getOperand(0));
5248}
5249
5250template <unsigned N>
5251static void
5253 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5254 unsigned ResOp) {
5255 const StringRef ErrorMsgOOR = "argument out of range";
5256 unsigned Imm = Node->getConstantOperandVal(2);
5257 if (!isUInt<N>(Imm)) {
5259 /*WithChain=*/false);
5260 return;
5261 }
5262 SDLoc DL(Node);
5263 SDValue Vec = Node->getOperand(1);
5264
5265 SDValue PickElt =
5266 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
5267 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
5269 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
5270 PickElt.getValue(0)));
5271}
5272
5275 SelectionDAG &DAG,
5276 const LoongArchSubtarget &Subtarget,
5277 unsigned ResOp) {
5278 SDLoc DL(N);
5279 SDValue Vec = N->getOperand(1);
5280
5281 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
5282 Results.push_back(
5283 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
5284}
5285
5286static void
5288 SelectionDAG &DAG,
5289 const LoongArchSubtarget &Subtarget) {
5290 switch (N->getConstantOperandVal(0)) {
5291 default:
5292 llvm_unreachable("Unexpected Intrinsic.");
5293 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5294 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5295 LoongArchISD::VPICK_SEXT_ELT);
5296 break;
5297 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5298 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5299 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5300 LoongArchISD::VPICK_SEXT_ELT);
5301 break;
5302 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5303 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5304 LoongArchISD::VPICK_SEXT_ELT);
5305 break;
5306 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5307 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5308 LoongArchISD::VPICK_ZEXT_ELT);
5309 break;
5310 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5311 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5312 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5313 LoongArchISD::VPICK_ZEXT_ELT);
5314 break;
5315 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5316 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5317 LoongArchISD::VPICK_ZEXT_ELT);
5318 break;
5319 case Intrinsic::loongarch_lsx_bz_b:
5320 case Intrinsic::loongarch_lsx_bz_h:
5321 case Intrinsic::loongarch_lsx_bz_w:
5322 case Intrinsic::loongarch_lsx_bz_d:
5323 case Intrinsic::loongarch_lasx_xbz_b:
5324 case Intrinsic::loongarch_lasx_xbz_h:
5325 case Intrinsic::loongarch_lasx_xbz_w:
5326 case Intrinsic::loongarch_lasx_xbz_d:
5327 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5328 LoongArchISD::VALL_ZERO);
5329 break;
5330 case Intrinsic::loongarch_lsx_bz_v:
5331 case Intrinsic::loongarch_lasx_xbz_v:
5332 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5333 LoongArchISD::VANY_ZERO);
5334 break;
5335 case Intrinsic::loongarch_lsx_bnz_b:
5336 case Intrinsic::loongarch_lsx_bnz_h:
5337 case Intrinsic::loongarch_lsx_bnz_w:
5338 case Intrinsic::loongarch_lsx_bnz_d:
5339 case Intrinsic::loongarch_lasx_xbnz_b:
5340 case Intrinsic::loongarch_lasx_xbnz_h:
5341 case Intrinsic::loongarch_lasx_xbnz_w:
5342 case Intrinsic::loongarch_lasx_xbnz_d:
5343 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5344 LoongArchISD::VALL_NONZERO);
5345 break;
5346 case Intrinsic::loongarch_lsx_bnz_v:
5347 case Intrinsic::loongarch_lasx_xbnz_v:
5348 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5349 LoongArchISD::VANY_NONZERO);
5350 break;
5351 }
5352}
5353
5356 SelectionDAG &DAG) {
5357 assert(N->getValueType(0) == MVT::i128 &&
5358 "AtomicCmpSwap on types less than 128 should be legal");
5359 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5360
5361 unsigned Opcode;
5362 switch (MemOp->getMergedOrdering()) {
5366 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5367 break;
5370 Opcode = LoongArch::PseudoCmpXchg128;
5371 break;
5372 default:
5373 llvm_unreachable("Unexpected ordering!");
5374 }
5375
5376 SDLoc DL(N);
5377 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5378 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5379 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5380 NewVal.first, NewVal.second, N->getOperand(0)};
5381
5382 SDNode *CmpSwap = DAG.getMachineNode(
5383 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5384 Ops);
5385 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5386 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5387 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5388 Results.push_back(SDValue(CmpSwap, 3));
5389}
5390
5393 SDLoc DL(N);
5394 EVT VT = N->getValueType(0);
5395 switch (N->getOpcode()) {
5396 default:
5397 llvm_unreachable("Don't know how to legalize this operation");
5398 case ISD::ADD:
5399 case ISD::SUB:
5400 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5401 "Unexpected custom legalisation");
5402 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5403 break;
5404 case ISD::SDIV:
5405 case ISD::UDIV:
5406 case ISD::SREM:
5407 case ISD::UREM:
5408 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5409 "Unexpected custom legalisation");
5410 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5411 Subtarget.hasDiv32() && VT == MVT::i32
5413 : ISD::SIGN_EXTEND));
5414 break;
5415 case ISD::SHL:
5416 case ISD::SRA:
5417 case ISD::SRL:
5418 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5419 "Unexpected custom legalisation");
5420 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5421 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5422 break;
5423 }
5424 break;
5425 case ISD::ROTL:
5426 case ISD::ROTR:
5427 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5428 "Unexpected custom legalisation");
5429 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5430 break;
5431 case ISD::LOAD: {
5432 // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
5433 // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
5434 // cast since type legalization will try to use an i64 load.
5435 MVT VT = N->getSimpleValueType(0);
5436 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5437 "Unexpected custom legalisation");
5439 "Unexpected type action!");
5440 if (!ISD::isNON_EXTLoad(N))
5441 return;
5442 auto *Ld = cast<LoadSDNode>(N);
5443 SDValue Res = DAG.getLoad(MVT::f64, DL, Ld->getChain(), Ld->getBasePtr(),
5444 Ld->getPointerInfo(), Ld->getBaseAlign(),
5445 Ld->getMemOperand()->getFlags());
5446 SDValue Chain = Res.getValue(1);
5447 MVT VecVT = MVT::getVectorVT(MVT::f64, 2);
5448 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Res);
5449 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
5450 Res = DAG.getBitcast(WideVT, Res);
5451 Results.push_back(Res);
5452 Results.push_back(Chain);
5453 break;
5454 }
5455 case ISD::FP_TO_SINT: {
5456 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5457 "Unexpected custom legalisation");
5458 SDValue Src = N->getOperand(0);
5459 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5460 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5462 if (!isTypeLegal(Src.getValueType()))
5463 return;
5464 if (Src.getValueType() == MVT::f16)
5465 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5466 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5467 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5468 return;
5469 }
5470 // If the FP type needs to be softened, emit a library call using the 'si'
5471 // version. If we left it to default legalization we'd end up with 'di'.
5472 RTLIB::Libcall LC;
5473 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5474 MakeLibCallOptions CallOptions;
5475 EVT OpVT = Src.getValueType();
5476 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5477 SDValue Chain = SDValue();
5478 SDValue Result;
5479 std::tie(Result, Chain) =
5480 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5481 Results.push_back(Result);
5482 break;
5483 }
5484 case ISD::BITCAST: {
5485 SDValue Src = N->getOperand(0);
5486 EVT SrcVT = Src.getValueType();
5487 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5488 Subtarget.hasBasicF()) {
5489 SDValue Dst =
5490 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5491 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5492 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5493 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5494 DAG.getVTList(MVT::i32, MVT::i32), Src);
5495 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5496 NewReg.getValue(0), NewReg.getValue(1));
5497 Results.push_back(RetReg);
5498 }
5499 break;
5500 }
5501 case ISD::FP_TO_UINT: {
5502 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5503 "Unexpected custom legalisation");
5504 auto &TLI = DAG.getTargetLoweringInfo();
5505 SDValue Tmp1, Tmp2;
5506 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5507 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5508 break;
5509 }
5510 case ISD::FP_ROUND: {
5511 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5512 "Unexpected custom legalisation");
5513 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5514 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5515 // a target-specific LoongArchISD::VFCVT to optimize it.
5516 SDValue Op0 = N->getOperand(0);
5517 EVT OpVT = Op0.getValueType();
5518 if (OpVT == MVT::v2f64) {
5519 SDValue Undef = DAG.getUNDEF(OpVT);
5520 SDValue Dst =
5521 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5522 Results.push_back(Dst);
5523 }
5524 break;
5525 }
5526 case ISD::BSWAP: {
5527 SDValue Src = N->getOperand(0);
5528 assert((VT == MVT::i16 || VT == MVT::i32) &&
5529 "Unexpected custom legalization");
5530 MVT GRLenVT = Subtarget.getGRLenVT();
5531 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5532 SDValue Tmp;
5533 switch (VT.getSizeInBits()) {
5534 default:
5535 llvm_unreachable("Unexpected operand width");
5536 case 16:
5537 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5538 break;
5539 case 32:
5540 // Only LA64 will get to here due to the size mismatch between VT and
5541 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5542 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5543 break;
5544 }
5545 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5546 break;
5547 }
5548 case ISD::BITREVERSE: {
5549 SDValue Src = N->getOperand(0);
5550 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5551 "Unexpected custom legalization");
5552 MVT GRLenVT = Subtarget.getGRLenVT();
5553 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5554 SDValue Tmp;
5555 switch (VT.getSizeInBits()) {
5556 default:
5557 llvm_unreachable("Unexpected operand width");
5558 case 8:
5559 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5560 break;
5561 case 32:
5562 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5563 break;
5564 }
5565 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5566 break;
5567 }
5568 case ISD::CTLZ:
5569 case ISD::CTTZ: {
5570 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5571 "Unexpected custom legalisation");
5572 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5573 break;
5574 }
5576 SDValue Chain = N->getOperand(0);
5577 SDValue Op2 = N->getOperand(2);
5578 MVT GRLenVT = Subtarget.getGRLenVT();
5579 const StringRef ErrorMsgOOR = "argument out of range";
5580 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5581 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5582
5583 switch (N->getConstantOperandVal(1)) {
5584 default:
5585 llvm_unreachable("Unexpected Intrinsic.");
5586 case Intrinsic::loongarch_movfcsr2gr: {
5587 if (!Subtarget.hasBasicF()) {
5588 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5589 return;
5590 }
5591 unsigned Imm = Op2->getAsZExtVal();
5592 if (!isUInt<2>(Imm)) {
5593 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5594 return;
5595 }
5596 SDValue MOVFCSR2GRResults = DAG.getNode(
5597 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5598 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5599 Results.push_back(
5600 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5601 Results.push_back(MOVFCSR2GRResults.getValue(1));
5602 break;
5603 }
5604#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5605 case Intrinsic::loongarch_##NAME: { \
5606 SDValue NODE = DAG.getNode( \
5607 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5608 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5609 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5610 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5611 Results.push_back(NODE.getValue(1)); \
5612 break; \
5613 }
5614 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5615 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5616 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5617 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5618 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5619 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5620#undef CRC_CASE_EXT_BINARYOP
5621
5622#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5623 case Intrinsic::loongarch_##NAME: { \
5624 SDValue NODE = DAG.getNode( \
5625 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5626 {Chain, Op2, \
5627 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5628 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5629 Results.push_back(NODE.getValue(1)); \
5630 break; \
5631 }
5632 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5633 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5634#undef CRC_CASE_EXT_UNARYOP
5635#define CSR_CASE(ID) \
5636 case Intrinsic::loongarch_##ID: { \
5637 if (!Subtarget.is64Bit()) \
5638 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5639 break; \
5640 }
5641 CSR_CASE(csrrd_d);
5642 CSR_CASE(csrwr_d);
5643 CSR_CASE(csrxchg_d);
5644 CSR_CASE(iocsrrd_d);
5645#undef CSR_CASE
5646 case Intrinsic::loongarch_csrrd_w: {
5647 unsigned Imm = Op2->getAsZExtVal();
5648 if (!isUInt<14>(Imm)) {
5649 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5650 return;
5651 }
5652 SDValue CSRRDResults =
5653 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5654 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5655 Results.push_back(
5656 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5657 Results.push_back(CSRRDResults.getValue(1));
5658 break;
5659 }
5660 case Intrinsic::loongarch_csrwr_w: {
5661 unsigned Imm = N->getConstantOperandVal(3);
5662 if (!isUInt<14>(Imm)) {
5663 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5664 return;
5665 }
5666 SDValue CSRWRResults =
5667 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5668 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5669 DAG.getConstant(Imm, DL, GRLenVT)});
5670 Results.push_back(
5671 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5672 Results.push_back(CSRWRResults.getValue(1));
5673 break;
5674 }
5675 case Intrinsic::loongarch_csrxchg_w: {
5676 unsigned Imm = N->getConstantOperandVal(4);
5677 if (!isUInt<14>(Imm)) {
5678 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5679 return;
5680 }
5681 SDValue CSRXCHGResults = DAG.getNode(
5682 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5683 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5684 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5685 DAG.getConstant(Imm, DL, GRLenVT)});
5686 Results.push_back(
5687 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5688 Results.push_back(CSRXCHGResults.getValue(1));
5689 break;
5690 }
5691#define IOCSRRD_CASE(NAME, NODE) \
5692 case Intrinsic::loongarch_##NAME: { \
5693 SDValue IOCSRRDResults = \
5694 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5695 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5696 Results.push_back( \
5697 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5698 Results.push_back(IOCSRRDResults.getValue(1)); \
5699 break; \
5700 }
5701 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5702 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5703 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5704#undef IOCSRRD_CASE
5705 case Intrinsic::loongarch_cpucfg: {
5706 SDValue CPUCFGResults =
5707 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5708 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5709 Results.push_back(
5710 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5711 Results.push_back(CPUCFGResults.getValue(1));
5712 break;
5713 }
5714 case Intrinsic::loongarch_lddir_d: {
5715 if (!Subtarget.is64Bit()) {
5716 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5717 return;
5718 }
5719 break;
5720 }
5721 }
5722 break;
5723 }
5724 case ISD::READ_REGISTER: {
5725 if (Subtarget.is64Bit())
5726 DAG.getContext()->emitError(
5727 "On LA64, only 64-bit registers can be read.");
5728 else
5729 DAG.getContext()->emitError(
5730 "On LA32, only 32-bit registers can be read.");
5731 Results.push_back(DAG.getUNDEF(VT));
5732 Results.push_back(N->getOperand(0));
5733 break;
5734 }
5736 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5737 break;
5738 }
5739 case ISD::LROUND: {
5740 SDValue Op0 = N->getOperand(0);
5741 EVT OpVT = Op0.getValueType();
5742 RTLIB::Libcall LC =
5743 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5744 MakeLibCallOptions CallOptions;
5745 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5746 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5747 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5748 Results.push_back(Result);
5749 break;
5750 }
5751 case ISD::ATOMIC_CMP_SWAP: {
5753 break;
5754 }
5755 case ISD::TRUNCATE: {
5756 MVT VT = N->getSimpleValueType(0);
5757 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5758 return;
5759
5760 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5761 SDValue In = N->getOperand(0);
5762 EVT InVT = In.getValueType();
5763 EVT InEltVT = InVT.getVectorElementType();
5764 EVT EltVT = VT.getVectorElementType();
5765 unsigned MinElts = VT.getVectorNumElements();
5766 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5767 unsigned InBits = InVT.getSizeInBits();
5768
5769 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5770 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5771 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5772 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5773 for (unsigned I = 0; I < MinElts; ++I)
5774 TruncMask[I] = Scale * I;
5775
5776 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5777 MVT SVT = In.getSimpleValueType().getScalarType();
5778 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5779 SDValue WidenIn =
5780 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5781 DAG.getVectorIdxConstant(0, DL));
5782 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5783 "Illegal vector type in truncation");
5784 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5785 Results.push_back(
5786 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5787 return;
5788 }
5789 }
5790
5791 break;
5792 }
5793 case ISD::SIGN_EXTEND: {
5794 // LASX has native VEXT2XV_* for sign extension.
5795 if (!Subtarget.hasExtLSX() || Subtarget.hasExtLASX())
5796 return;
5797
5798 EVT DstVT = N->getValueType(0);
5799 SDValue Src = N->getOperand(0);
5800 MVT SrcVT = Src.getSimpleValueType();
5801
5802 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5803 unsigned DstEltBits = DstVT.getScalarSizeInBits();
5804 unsigned NumElts = DstVT.getVectorNumElements();
5805
5806 if (SrcVT.getSizeInBits() > 128)
5807 return;
5808
5809 if (!DstVT.isVector() || DstVT.getSizeInBits() <= 128)
5810 return;
5811
5812 // Legalize and extend the src to 128-bit first.
5813 if (SrcVT.getSizeInBits() < 128) {
5814 unsigned WidenSrcElts = 128 / SrcEltBits;
5815 MVT WidenSrcVT = MVT::getVectorVT(SrcVT.getScalarType(), WidenSrcElts);
5816 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WidenSrcVT,
5817 DAG.getUNDEF(WidenSrcVT), Src,
5818 DAG.getVectorIdxConstant(0, DL));
5819 SrcVT = WidenSrcVT;
5820
5821 unsigned FirstStageEltBits = 128 / NumElts;
5822 MVT FirstStageEltVT = MVT::getIntegerVT(FirstStageEltBits);
5823 MVT FirstStageVT = MVT::getVectorVT(FirstStageEltVT, NumElts);
5824 Src = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, FirstStageVT, Src);
5825 SrcVT = FirstStageVT;
5826 SrcEltBits = FirstStageEltBits;
5827 }
5828
5830 Blocks.push_back(Src);
5831
5832 // Sign-extend the src by using SLTI + VILVL + VILVH recursively.
5833 while (SrcEltBits < DstEltBits) {
5834 unsigned NextEltBits = SrcEltBits * 2;
5835 MVT NextEltVT = MVT::getIntegerVT(NextEltBits);
5836 unsigned CurEltsPerBlock = SrcVT.getVectorNumElements();
5837 unsigned NextEltsPerBlock = CurEltsPerBlock / 2;
5838 MVT NextBlockVT = MVT::getVectorVT(NextEltVT, NextEltsPerBlock);
5839
5840 SmallVector<SDValue, 8> NextBlocks;
5841 NextBlocks.reserve(Blocks.size() * 2);
5842 for (SDValue Block : Blocks) {
5843 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
5844 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Block, Zero,
5845 DAG.getCondCode(ISD::SETLT));
5846 SDValue LoInterleaved =
5847 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Block);
5848 SDValue HiInterleaved =
5849 DAG.getNode(LoongArchISD::VILVH, DL, SrcVT, Mask, Block);
5850
5851 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, LoInterleaved));
5852 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, HiInterleaved));
5853 }
5854
5855 Blocks = std::move(NextBlocks);
5856 SrcVT = NextBlockVT;
5857 SrcEltBits = NextEltBits;
5858 }
5859
5860 Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Blocks));
5861 break;
5862 }
5863 case ISD::FP_EXTEND:
5864 // FP_EXTEND may reach here due to the Custom action for v2f32 results, but
5865 // no target-specific lowering is required. Leave it unchanged and rely on
5866 // the default type legalization.
5867 break;
5868 }
5869}
5870
5871/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5873 SelectionDAG &DAG) {
5874 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5875
5876 MVT VT = N->getSimpleValueType(0);
5877 if (!VT.is128BitVector() && !VT.is256BitVector())
5878 return SDValue();
5879
5880 SDValue X, Y;
5881 SDValue N0 = N->getOperand(0);
5882 SDValue N1 = N->getOperand(1);
5883
5884 if (SDValue Not = isNOT(N0, DAG)) {
5885 X = Not;
5886 Y = N1;
5887 } else if (SDValue Not = isNOT(N1, DAG)) {
5888 X = Not;
5889 Y = N0;
5890 } else
5891 return SDValue();
5892
5893 X = DAG.getBitcast(VT, X);
5894 Y = DAG.getBitcast(VT, Y);
5895 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5896}
5897
5898static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
5899 unsigned MinSizeInBits) {
5902
5903 if (!Node)
5904 return false;
5905
5906 APInt SplatUndef;
5907 unsigned SplatBitSize;
5908 bool HasAnyUndefs;
5909
5910 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
5911 HasAnyUndefs, MinSizeInBits,
5912 /*IsBigEndian=*/false);
5913}
5914
5917 const LoongArchSubtarget &Subtarget) {
5918 if (DCI.isBeforeLegalizeOps())
5919 return SDValue();
5920
5921 EVT VT = N->getValueType(0);
5922 if (!VT.isVector())
5923 return SDValue();
5924
5925 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
5926 return SDValue();
5927
5928 EVT EltVT = VT.getVectorElementType();
5929 if (!EltVT.isInteger())
5930 return SDValue();
5931
5932 // match:
5933 //
5934 // add
5935 // (and
5936 // (srl X, shift-1) / X
5937 // 1)
5938 // (srl/sra X, shift)
5939
5940 SDValue Add0 = N->getOperand(0);
5941 SDValue Add1 = N->getOperand(1);
5942 SDValue And;
5943 SDValue Shr;
5944
5945 if (Add0.getOpcode() == ISD::AND) {
5946 And = Add0;
5947 Shr = Add1;
5948 } else if (Add1.getOpcode() == ISD::AND) {
5949 And = Add1;
5950 Shr = Add0;
5951 } else {
5952 return SDValue();
5953 }
5954
5955 // match:
5956 //
5957 // srl/sra X, shift
5958
5959 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
5960 return SDValue();
5961
5962 SDValue X = Shr.getOperand(0);
5963 SDValue Shift = Shr.getOperand(1);
5964 APInt ShiftVal;
5965
5966 if (!isConstantSplatVector(Shift, ShiftVal, EltVT.getSizeInBits()))
5967 return SDValue();
5968
5969 if (ShiftVal == 0)
5970 return SDValue();
5971
5972 // match:
5973 //
5974 // and
5975 // (srl X, shift-1) / X
5976 // 1
5977
5978 SDValue One = And.getOperand(1);
5979 APInt SplatVal;
5980
5981 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
5982 return SDValue();
5983
5984 if (SplatVal != 1)
5985 return SDValue();
5986
5987 if (And.getOperand(0) == X) {
5988 // match:
5989 //
5990 // shift == 1
5991
5992 if (ShiftVal != 1)
5993 return SDValue();
5994 } else {
5995 // match:
5996 //
5997 // srl X, shift-1
5998
5999 SDValue Srl = And.getOperand(0);
6000
6001 if (Srl.getOpcode() != ISD::SRL)
6002 return SDValue();
6003
6004 if (Srl.getOperand(0) != X)
6005 return SDValue();
6006
6007 // match:
6008 //
6009 // shift-1
6010
6011 SDValue ShiftMinus1 = Srl.getOperand(1);
6012
6013 if (!isConstantSplatVector(ShiftMinus1, SplatVal, EltVT.getSizeInBits()))
6014 return SDValue();
6015
6016 if (ShiftVal != (SplatVal + 1))
6017 return SDValue();
6018 }
6019
6020 // We matched a rounded right shift pattern and can lower it
6021 // to a single vector rounded shift instruction.
6022
6023 SDLoc DL(N);
6024 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
6025 : LoongArchISD::VSRAR,
6026 DL, VT, X, Shift);
6027}
6028
6031 const LoongArchSubtarget &Subtarget) {
6032 if (DCI.isBeforeLegalizeOps())
6033 return SDValue();
6034
6035 SDValue FirstOperand = N->getOperand(0);
6036 SDValue SecondOperand = N->getOperand(1);
6037 unsigned FirstOperandOpc = FirstOperand.getOpcode();
6038 EVT ValTy = N->getValueType(0);
6039 SDLoc DL(N);
6040 uint64_t lsb, msb;
6041 unsigned SMIdx, SMLen;
6042 ConstantSDNode *CN;
6043 SDValue NewOperand;
6044 MVT GRLenVT = Subtarget.getGRLenVT();
6045
6046 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
6047 return R;
6048
6049 // BSTRPICK requires the 32S feature.
6050 if (!Subtarget.has32S())
6051 return SDValue();
6052
6053 // Op's second operand must be a shifted mask.
6054 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
6055 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
6056 return SDValue();
6057
6058 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
6059 // Pattern match BSTRPICK.
6060 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
6061 // => BSTRPICK $dst, $src, msb, lsb
6062 // where msb = lsb + len - 1
6063
6064 // The second operand of the shift must be an immediate.
6065 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
6066 return SDValue();
6067
6068 lsb = CN->getZExtValue();
6069
6070 // Return if the shifted mask does not start at bit 0 or the sum of its
6071 // length and lsb exceeds the word's size.
6072 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
6073 return SDValue();
6074
6075 NewOperand = FirstOperand.getOperand(0);
6076 } else {
6077 // Pattern match BSTRPICK.
6078 // $dst = and $src, (2**len- 1) , if len > 12
6079 // => BSTRPICK $dst, $src, msb, lsb
6080 // where lsb = 0 and msb = len - 1
6081
6082 // If the mask is <= 0xfff, andi can be used instead.
6083 if (CN->getZExtValue() <= 0xfff)
6084 return SDValue();
6085
6086 // Return if the MSB exceeds.
6087 if (SMIdx + SMLen > ValTy.getSizeInBits())
6088 return SDValue();
6089
6090 if (SMIdx > 0) {
6091 // Omit if the constant has more than 2 uses. This a conservative
6092 // decision. Whether it is a win depends on the HW microarchitecture.
6093 // However it should always be better for 1 and 2 uses.
6094 if (CN->use_size() > 2)
6095 return SDValue();
6096 // Return if the constant can be composed by a single LU12I.W.
6097 if ((CN->getZExtValue() & 0xfff) == 0)
6098 return SDValue();
6099 // Return if the constand can be composed by a single ADDI with
6100 // the zero register.
6101 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
6102 return SDValue();
6103 }
6104
6105 lsb = SMIdx;
6106 NewOperand = FirstOperand;
6107 }
6108
6109 msb = lsb + SMLen - 1;
6110 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
6111 DAG.getConstant(msb, DL, GRLenVT),
6112 DAG.getConstant(lsb, DL, GRLenVT));
6113 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
6114 return NR0;
6115 // Try to optimize to
6116 // bstrpick $Rd, $Rs, msb, lsb
6117 // slli $Rd, $Rd, lsb
6118 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
6119 DAG.getConstant(lsb, DL, GRLenVT));
6120}
6121
6124 const LoongArchSubtarget &Subtarget) {
6125 // BSTRPICK requires the 32S feature.
6126 if (!Subtarget.has32S())
6127 return SDValue();
6128
6129 if (DCI.isBeforeLegalizeOps())
6130 return SDValue();
6131
6132 // $dst = srl (and $src, Mask), Shamt
6133 // =>
6134 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
6135 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
6136 //
6137
6138 SDValue FirstOperand = N->getOperand(0);
6139 ConstantSDNode *CN;
6140 EVT ValTy = N->getValueType(0);
6141 SDLoc DL(N);
6142 MVT GRLenVT = Subtarget.getGRLenVT();
6143 unsigned MaskIdx, MaskLen;
6144 uint64_t Shamt;
6145
6146 // The first operand must be an AND and the second operand of the AND must be
6147 // a shifted mask.
6148 if (FirstOperand.getOpcode() != ISD::AND ||
6149 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
6150 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
6151 return SDValue();
6152
6153 // The second operand (shift amount) must be an immediate.
6154 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
6155 return SDValue();
6156
6157 Shamt = CN->getZExtValue();
6158 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
6159 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
6160 FirstOperand->getOperand(0),
6161 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6162 DAG.getConstant(Shamt, DL, GRLenVT));
6163
6164 return SDValue();
6165}
6166
6167// Helper to peek through bitops/trunc/setcc to determine size of source vector.
6168// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
6169static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
6170 unsigned Depth) {
6171 // Limit recursion.
6173 return false;
6174 switch (Src.getOpcode()) {
6175 case ISD::SETCC:
6176 case ISD::TRUNCATE:
6177 return Src.getOperand(0).getValueSizeInBits() == Size;
6178 case ISD::FREEZE:
6179 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
6180 case ISD::AND:
6181 case ISD::XOR:
6182 case ISD::OR:
6183 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
6184 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
6185 case ISD::SELECT:
6186 case ISD::VSELECT:
6187 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
6188 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
6189 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
6190 case ISD::BUILD_VECTOR:
6191 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
6192 ISD::isBuildVectorAllOnes(Src.getNode());
6193 }
6194 return false;
6195}
6196
6197// Helper to push sign extension of vXi1 SETCC result through bitops.
6199 SDValue Src, const SDLoc &DL) {
6200 switch (Src.getOpcode()) {
6201 case ISD::SETCC:
6202 case ISD::FREEZE:
6203 case ISD::TRUNCATE:
6204 case ISD::BUILD_VECTOR:
6205 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6206 case ISD::AND:
6207 case ISD::XOR:
6208 case ISD::OR:
6209 return DAG.getNode(
6210 Src.getOpcode(), DL, SExtVT,
6211 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
6212 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
6213 case ISD::SELECT:
6214 case ISD::VSELECT:
6215 return DAG.getSelect(
6216 DL, SExtVT, Src.getOperand(0),
6217 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
6218 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
6219 }
6220 llvm_unreachable("Unexpected node type for vXi1 sign extension");
6221}
6222
6223static SDValue
6226 const LoongArchSubtarget &Subtarget) {
6227 SDLoc DL(N);
6228 EVT VT = N->getValueType(0);
6229 SDValue Src = N->getOperand(0);
6230 EVT SrcVT = Src.getValueType();
6231
6232 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
6233 return SDValue();
6234
6235 bool UseLASX;
6236 unsigned Opc = ISD::DELETED_NODE;
6237 EVT CmpVT = Src.getOperand(0).getValueType();
6238 EVT EltVT = CmpVT.getVectorElementType();
6239
6240 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
6241 UseLASX = false;
6242 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
6243 CmpVT.getSizeInBits() == 256)
6244 UseLASX = true;
6245 else
6246 return SDValue();
6247
6248 SDValue SrcN1 = Src.getOperand(1);
6249 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
6250 default:
6251 break;
6252 case ISD::SETEQ:
6253 // x == 0 => not (vmsknez.b x)
6254 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6255 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
6256 break;
6257 case ISD::SETGT:
6258 // x > -1 => vmskgez.b x
6259 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
6260 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6261 break;
6262 case ISD::SETGE:
6263 // x >= 0 => vmskgez.b x
6264 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6265 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6266 break;
6267 case ISD::SETLT:
6268 // x < 0 => vmskltz.{b,h,w,d} x
6269 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
6270 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6271 EltVT == MVT::i64))
6272 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6273 break;
6274 case ISD::SETLE:
6275 // x <= -1 => vmskltz.{b,h,w,d} x
6276 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
6277 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6278 EltVT == MVT::i64))
6279 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6280 break;
6281 case ISD::SETNE:
6282 // x != 0 => vmsknez.b x
6283 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6284 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6285 break;
6286 }
6287
6288 if (Opc == ISD::DELETED_NODE)
6289 return SDValue();
6290
6291 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
6293 V = DAG.getZExtOrTrunc(V, DL, T);
6294 return DAG.getBitcast(VT, V);
6295}
6296
6299 const LoongArchSubtarget &Subtarget) {
6300 SDLoc DL(N);
6301 EVT VT = N->getValueType(0);
6302 SDValue Src = N->getOperand(0);
6303 EVT SrcVT = Src.getValueType();
6304 MVT GRLenVT = Subtarget.getGRLenVT();
6305
6306 if (!DCI.isBeforeLegalizeOps())
6307 return SDValue();
6308
6309 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6310 return SDValue();
6311
6312 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6313 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6314 if (Res)
6315 return Res;
6316
6317 // Generate vXi1 using [X]VMSKLTZ
6318 MVT SExtVT;
6319 unsigned Opc;
6320 bool UseLASX = false;
6321 bool PropagateSExt = false;
6322
6323 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6324 EVT CmpVT = Src.getOperand(0).getValueType();
6325 if (CmpVT.getSizeInBits() > 256)
6326 return SDValue();
6327 }
6328
6329 switch (SrcVT.getSimpleVT().SimpleTy) {
6330 default:
6331 return SDValue();
6332 case MVT::v2i1:
6333 SExtVT = MVT::v2i64;
6334 break;
6335 case MVT::v4i1:
6336 SExtVT = MVT::v4i32;
6337 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6338 SExtVT = MVT::v4i64;
6339 UseLASX = true;
6340 PropagateSExt = true;
6341 }
6342 break;
6343 case MVT::v8i1:
6344 SExtVT = MVT::v8i16;
6345 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6346 SExtVT = MVT::v8i32;
6347 UseLASX = true;
6348 PropagateSExt = true;
6349 }
6350 break;
6351 case MVT::v16i1:
6352 SExtVT = MVT::v16i8;
6353 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6354 SExtVT = MVT::v16i16;
6355 UseLASX = true;
6356 PropagateSExt = true;
6357 }
6358 break;
6359 case MVT::v32i1:
6360 SExtVT = MVT::v32i8;
6361 UseLASX = true;
6362 break;
6363 };
6364 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6365 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6366
6367 SDValue V;
6368 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6369 if (Src.getSimpleValueType() == MVT::v32i8) {
6370 SDValue Lo, Hi;
6371 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
6372 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
6373 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
6374 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
6375 DAG.getShiftAmountConstant(16, GRLenVT, DL));
6376 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
6377 } else if (UseLASX) {
6378 return SDValue();
6379 }
6380 }
6381
6382 if (!V) {
6383 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6384 V = DAG.getNode(Opc, DL, GRLenVT, Src);
6385 }
6386
6388 V = DAG.getZExtOrTrunc(V, DL, T);
6389 return DAG.getBitcast(VT, V);
6390}
6391
6394 const LoongArchSubtarget &Subtarget) {
6395 MVT GRLenVT = Subtarget.getGRLenVT();
6396 EVT ValTy = N->getValueType(0);
6397 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6398 ConstantSDNode *CN0, *CN1;
6399 SDLoc DL(N);
6400 unsigned ValBits = ValTy.getSizeInBits();
6401 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6402 unsigned Shamt;
6403 bool SwapAndRetried = false;
6404
6405 // BSTRPICK requires the 32S feature.
6406 if (!Subtarget.has32S())
6407 return SDValue();
6408
6409 if (DCI.isBeforeLegalizeOps())
6410 return SDValue();
6411
6412 if (ValBits != 32 && ValBits != 64)
6413 return SDValue();
6414
6415Retry:
6416 // 1st pattern to match BSTRINS:
6417 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6418 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6419 // =>
6420 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6421 if (N0.getOpcode() == ISD::AND &&
6422 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6423 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6424 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
6425 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6426 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6427 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6428 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6429 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6430 (MaskIdx0 + MaskLen0 <= ValBits)) {
6431 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6432 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6433 N1.getOperand(0).getOperand(0),
6434 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6435 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6436 }
6437
6438 // 2nd pattern to match BSTRINS:
6439 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6440 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6441 // =>
6442 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6443 if (N0.getOpcode() == ISD::AND &&
6444 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6445 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6446 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6447 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6448 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6449 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6450 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6451 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6452 (MaskIdx0 + MaskLen0 <= ValBits)) {
6453 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6454 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6455 N1.getOperand(0).getOperand(0),
6456 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6457 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6458 }
6459
6460 // 3rd pattern to match BSTRINS:
6461 // R = or (and X, mask0), (and Y, mask1)
6462 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6463 // =>
6464 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6465 // where msb = lsb + size - 1
6466 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6467 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6468 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6469 (MaskIdx0 + MaskLen0 <= 64) &&
6470 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
6471 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6472 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6473 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6474 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
6475 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
6476 DAG.getConstant(ValBits == 32
6477 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6478 : (MaskIdx0 + MaskLen0 - 1),
6479 DL, GRLenVT),
6480 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6481 }
6482
6483 // 4th pattern to match BSTRINS:
6484 // R = or (and X, mask), (shl Y, shamt)
6485 // where mask = (2**shamt - 1)
6486 // =>
6487 // R = BSTRINS X, Y, ValBits - 1, shamt
6488 // where ValBits = 32 or 64
6489 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6490 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6491 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
6492 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6493 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6494 (MaskIdx0 + MaskLen0 <= ValBits)) {
6495 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6496 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6497 N1.getOperand(0),
6498 DAG.getConstant((ValBits - 1), DL, GRLenVT),
6499 DAG.getConstant(Shamt, DL, GRLenVT));
6500 }
6501
6502 // 5th pattern to match BSTRINS:
6503 // R = or (and X, mask), const
6504 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6505 // =>
6506 // R = BSTRINS X, (const >> lsb), msb, lsb
6507 // where msb = lsb + size - 1
6508 if (N0.getOpcode() == ISD::AND &&
6509 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6510 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6511 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
6512 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6513 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6514 return DAG.getNode(
6515 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6516 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
6517 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6518 : (MaskIdx0 + MaskLen0 - 1),
6519 DL, GRLenVT),
6520 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6521 }
6522
6523 // 6th pattern.
6524 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6525 // by the incoming bits are known to be zero.
6526 // =>
6527 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6528 //
6529 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6530 // pattern is more common than the 1st. So we put the 1st before the 6th in
6531 // order to match as many nodes as possible.
6532 ConstantSDNode *CNMask, *CNShamt;
6533 unsigned MaskIdx, MaskLen;
6534 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6535 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6536 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6537 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6538 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6539 Shamt = CNShamt->getZExtValue();
6540 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6541 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6542 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6543 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6544 N1.getOperand(0).getOperand(0),
6545 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
6546 DAG.getConstant(Shamt, DL, GRLenVT));
6547 }
6548 }
6549
6550 // 7th pattern.
6551 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6552 // overwritten by the incoming bits are known to be zero.
6553 // =>
6554 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6555 //
6556 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6557 // before the 7th in order to match as many nodes as possible.
6558 if (N1.getOpcode() == ISD::AND &&
6559 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6560 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6561 N1.getOperand(0).getOpcode() == ISD::SHL &&
6562 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6563 CNShamt->getZExtValue() == MaskIdx) {
6564 APInt ShMask(ValBits, CNMask->getZExtValue());
6565 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6566 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6567 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6568 N1.getOperand(0).getOperand(0),
6569 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6570 DAG.getConstant(MaskIdx, DL, GRLenVT));
6571 }
6572 }
6573
6574 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6575 if (!SwapAndRetried) {
6576 std::swap(N0, N1);
6577 SwapAndRetried = true;
6578 goto Retry;
6579 }
6580
6581 SwapAndRetried = false;
6582Retry2:
6583 // 8th pattern.
6584 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6585 // the incoming bits are known to be zero.
6586 // =>
6587 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6588 //
6589 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6590 // we put it here in order to match as many nodes as possible or generate less
6591 // instructions.
6592 if (N1.getOpcode() == ISD::AND &&
6593 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6594 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6595 APInt ShMask(ValBits, CNMask->getZExtValue());
6596 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6597 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6598 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6599 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6600 N1->getOperand(0),
6601 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6602 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6603 DAG.getConstant(MaskIdx, DL, GRLenVT));
6604 }
6605 }
6606 // Swap N0/N1 and retry.
6607 if (!SwapAndRetried) {
6608 std::swap(N0, N1);
6609 SwapAndRetried = true;
6610 goto Retry2;
6611 }
6612
6613 return SDValue();
6614}
6615
6616static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6617 ExtType = ISD::NON_EXTLOAD;
6618
6619 switch (V.getNode()->getOpcode()) {
6620 case ISD::LOAD: {
6621 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
6622 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6623 (LoadNode->getMemoryVT() == MVT::i16)) {
6624 ExtType = LoadNode->getExtensionType();
6625 return true;
6626 }
6627 return false;
6628 }
6629 case ISD::AssertSext: {
6630 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6631 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6632 ExtType = ISD::SEXTLOAD;
6633 return true;
6634 }
6635 return false;
6636 }
6637 case ISD::AssertZext: {
6638 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6639 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6640 ExtType = ISD::ZEXTLOAD;
6641 return true;
6642 }
6643 return false;
6644 }
6645 default:
6646 return false;
6647 }
6648
6649 return false;
6650}
6651
6652// Eliminate redundant truncation and zero-extension nodes.
6653// * Case 1:
6654// +------------+ +------------+ +------------+
6655// | Input1 | | Input2 | | CC |
6656// +------------+ +------------+ +------------+
6657// | | |
6658// V V +----+
6659// +------------+ +------------+ |
6660// | TRUNCATE | | TRUNCATE | |
6661// +------------+ +------------+ |
6662// | | |
6663// V V |
6664// +------------+ +------------+ |
6665// | ZERO_EXT | | ZERO_EXT | |
6666// +------------+ +------------+ |
6667// | | |
6668// | +-------------+ |
6669// V V | |
6670// +----------------+ | |
6671// | AND | | |
6672// +----------------+ | |
6673// | | |
6674// +---------------+ | |
6675// | | |
6676// V V V
6677// +-------------+
6678// | CMP |
6679// +-------------+
6680// * Case 2:
6681// +------------+ +------------+ +-------------+ +------------+ +------------+
6682// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
6683// +------------+ +------------+ +-------------+ +------------+ +------------+
6684// | | | | |
6685// V | | | |
6686// +------------+ | | | |
6687// | XOR |<---------------------+ | |
6688// +------------+ | | |
6689// | | | |
6690// V V +---------------+ |
6691// +------------+ +------------+ | |
6692// | TRUNCATE | | TRUNCATE | | +-------------------------+
6693// +------------+ +------------+ | |
6694// | | | |
6695// V V | |
6696// +------------+ +------------+ | |
6697// | ZERO_EXT | | ZERO_EXT | | |
6698// +------------+ +------------+ | |
6699// | | | |
6700// V V | |
6701// +----------------+ | |
6702// | AND | | |
6703// +----------------+ | |
6704// | | |
6705// +---------------+ | |
6706// | | |
6707// V V V
6708// +-------------+
6709// | CMP |
6710// +-------------+
6713 const LoongArchSubtarget &Subtarget) {
6714 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
6715
6716 SDNode *AndNode = N->getOperand(0).getNode();
6717 if (AndNode->getOpcode() != ISD::AND)
6718 return SDValue();
6719
6720 SDValue AndInputValue2 = AndNode->getOperand(1);
6721 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
6722 return SDValue();
6723
6724 SDValue CmpInputValue = N->getOperand(1);
6725 SDValue AndInputValue1 = AndNode->getOperand(0);
6726 if (AndInputValue1.getOpcode() == ISD::XOR) {
6727 if (CC != ISD::SETEQ && CC != ISD::SETNE)
6728 return SDValue();
6729 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
6730 if (!CN || !CN->isAllOnes())
6731 return SDValue();
6732 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
6733 if (!CN || !CN->isZero())
6734 return SDValue();
6735 AndInputValue1 = AndInputValue1.getOperand(0);
6736 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
6737 return SDValue();
6738 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
6739 if (AndInputValue2 != CmpInputValue)
6740 return SDValue();
6741 } else {
6742 return SDValue();
6743 }
6744
6745 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
6746 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
6747 return SDValue();
6748
6749 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
6750 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
6751 return SDValue();
6752
6753 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
6754 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
6755 ISD::LoadExtType ExtType1;
6756 ISD::LoadExtType ExtType2;
6757
6758 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
6759 !checkValueWidth(TruncInputValue2, ExtType2))
6760 return SDValue();
6761
6762 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
6763 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
6764 return SDValue();
6765
6766 if ((ExtType2 != ISD::ZEXTLOAD) &&
6767 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
6768 return SDValue();
6769
6770 // These truncation and zero-extension nodes are not necessary, remove them.
6771 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
6772 TruncInputValue1, TruncInputValue2);
6773 SDValue NewSetCC =
6774 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
6775 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
6776 return SDValue(N, 0);
6777}
6778
6779// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
6782 const LoongArchSubtarget &Subtarget) {
6783 if (DCI.isBeforeLegalizeOps())
6784 return SDValue();
6785
6786 SDValue Src = N->getOperand(0);
6787 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6788 return SDValue();
6789
6790 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6791 Src.getOperand(0));
6792}
6793
6794// Perform common combines for BR_CC and SELECT_CC conditions.
6795static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6796 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6797 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6798
6799 // As far as arithmetic right shift always saves the sign,
6800 // shift can be omitted.
6801 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6802 // setge (sra X, N), 0 -> setge X, 0
6803 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6804 LHS.getOpcode() == ISD::SRA) {
6805 LHS = LHS.getOperand(0);
6806 return true;
6807 }
6808
6809 if (!ISD::isIntEqualitySetCC(CCVal))
6810 return false;
6811
6812 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6813 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6814 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6815 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6816 // If we're looking for eq 0 instead of ne 0, we need to invert the
6817 // condition.
6818 bool Invert = CCVal == ISD::SETEQ;
6819 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6820 if (Invert)
6821 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6822
6823 RHS = LHS.getOperand(1);
6824 LHS = LHS.getOperand(0);
6825 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6826
6827 CC = DAG.getCondCode(CCVal);
6828 return true;
6829 }
6830
6831 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6832 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6833 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6834 SDValue LHS0 = LHS.getOperand(0);
6835 if (LHS0.getOpcode() == ISD::AND &&
6836 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6837 uint64_t Mask = LHS0.getConstantOperandVal(1);
6838 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6839 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6840 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6841 CC = DAG.getCondCode(CCVal);
6842
6843 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6844 LHS = LHS0.getOperand(0);
6845 if (ShAmt != 0)
6846 LHS =
6847 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6848 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6849 return true;
6850 }
6851 }
6852 }
6853
6854 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6855 // This can occur when legalizing some floating point comparisons.
6856 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6857 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6858 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6859 CC = DAG.getCondCode(CCVal);
6860 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6861 return true;
6862 }
6863
6864 return false;
6865}
6866
6869 const LoongArchSubtarget &Subtarget) {
6870 SDValue LHS = N->getOperand(1);
6871 SDValue RHS = N->getOperand(2);
6872 SDValue CC = N->getOperand(3);
6873 SDLoc DL(N);
6874
6875 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6876 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6877 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6878
6879 return SDValue();
6880}
6881
6884 const LoongArchSubtarget &Subtarget) {
6885 // Transform
6886 SDValue LHS = N->getOperand(0);
6887 SDValue RHS = N->getOperand(1);
6888 SDValue CC = N->getOperand(2);
6889 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6890 SDValue TrueV = N->getOperand(3);
6891 SDValue FalseV = N->getOperand(4);
6892 SDLoc DL(N);
6893 EVT VT = N->getValueType(0);
6894
6895 // If the True and False values are the same, we don't need a select_cc.
6896 if (TrueV == FalseV)
6897 return TrueV;
6898
6899 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6900 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6901 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6903 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6904 if (CCVal == ISD::CondCode::SETGE)
6905 std::swap(TrueV, FalseV);
6906
6907 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6908 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6909 // Only handle simm12, if it is not in this range, it can be considered as
6910 // register.
6911 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6912 isInt<12>(TrueSImm - FalseSImm)) {
6913 SDValue SRA =
6914 DAG.getNode(ISD::SRA, DL, VT, LHS,
6915 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6916 SDValue AND =
6917 DAG.getNode(ISD::AND, DL, VT, SRA,
6918 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6919 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6920 }
6921
6922 if (CCVal == ISD::CondCode::SETGE)
6923 std::swap(TrueV, FalseV);
6924 }
6925
6926 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6927 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6928 {LHS, RHS, CC, TrueV, FalseV});
6929
6930 return SDValue();
6931}
6932
6933template <unsigned N>
6935 SelectionDAG &DAG,
6936 const LoongArchSubtarget &Subtarget,
6937 bool IsSigned = false) {
6938 SDLoc DL(Node);
6939 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6940 // Check the ImmArg.
6941 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6942 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6943 DAG.getContext()->emitError(Node->getOperationName(0) +
6944 ": argument out of range.");
6945 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6946 }
6947 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6948}
6949
6950template <unsigned N>
6951static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6952 SelectionDAG &DAG, bool IsSigned = false) {
6953 SDLoc DL(Node);
6954 EVT ResTy = Node->getValueType(0);
6955 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6956
6957 // Check the ImmArg.
6958 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6959 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6960 DAG.getContext()->emitError(Node->getOperationName(0) +
6961 ": argument out of range.");
6962 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6963 }
6964 return DAG.getConstant(
6966 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6967 DL, ResTy);
6968}
6969
6971 SDLoc DL(Node);
6972 EVT ResTy = Node->getValueType(0);
6973 SDValue Vec = Node->getOperand(2);
6974 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6975 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6976}
6977
6979 SDLoc DL(Node);
6980 EVT ResTy = Node->getValueType(0);
6981 SDValue One = DAG.getConstant(1, DL, ResTy);
6982 SDValue Bit =
6983 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6984
6985 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6986 DAG.getNOT(DL, Bit, ResTy));
6987}
6988
6989template <unsigned N>
6991 SDLoc DL(Node);
6992 EVT ResTy = Node->getValueType(0);
6993 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6994 // Check the unsigned ImmArg.
6995 if (!isUInt<N>(CImm->getZExtValue())) {
6996 DAG.getContext()->emitError(Node->getOperationName(0) +
6997 ": argument out of range.");
6998 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6999 }
7000
7001 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7002 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
7003
7004 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
7005}
7006
7007template <unsigned N>
7009 SDLoc DL(Node);
7010 EVT ResTy = Node->getValueType(0);
7011 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7012 // Check the unsigned ImmArg.
7013 if (!isUInt<N>(CImm->getZExtValue())) {
7014 DAG.getContext()->emitError(Node->getOperationName(0) +
7015 ": argument out of range.");
7016 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7017 }
7018
7019 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7020 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7021 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
7022}
7023
7024template <unsigned N>
7026 SDLoc DL(Node);
7027 EVT ResTy = Node->getValueType(0);
7028 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7029 // Check the unsigned ImmArg.
7030 if (!isUInt<N>(CImm->getZExtValue())) {
7031 DAG.getContext()->emitError(Node->getOperationName(0) +
7032 ": argument out of range.");
7033 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7034 }
7035
7036 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7037 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7038 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
7039}
7040
7041template <unsigned W>
7043 unsigned ResOp) {
7044 unsigned Imm = N->getConstantOperandVal(2);
7045 if (!isUInt<W>(Imm)) {
7046 const StringRef ErrorMsg = "argument out of range";
7047 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
7048 return DAG.getUNDEF(N->getValueType(0));
7049 }
7050 SDLoc DL(N);
7051 SDValue Vec = N->getOperand(1);
7052 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
7054 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
7055}
7056
7057static SDValue
7060 const LoongArchSubtarget &Subtarget) {
7061 SDLoc DL(N);
7062 switch (N->getConstantOperandVal(0)) {
7063 default:
7064 break;
7065 case Intrinsic::loongarch_lsx_vadd_b:
7066 case Intrinsic::loongarch_lsx_vadd_h:
7067 case Intrinsic::loongarch_lsx_vadd_w:
7068 case Intrinsic::loongarch_lsx_vadd_d:
7069 case Intrinsic::loongarch_lasx_xvadd_b:
7070 case Intrinsic::loongarch_lasx_xvadd_h:
7071 case Intrinsic::loongarch_lasx_xvadd_w:
7072 case Intrinsic::loongarch_lasx_xvadd_d:
7073 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7074 N->getOperand(2));
7075 case Intrinsic::loongarch_lsx_vaddi_bu:
7076 case Intrinsic::loongarch_lsx_vaddi_hu:
7077 case Intrinsic::loongarch_lsx_vaddi_wu:
7078 case Intrinsic::loongarch_lsx_vaddi_du:
7079 case Intrinsic::loongarch_lasx_xvaddi_bu:
7080 case Intrinsic::loongarch_lasx_xvaddi_hu:
7081 case Intrinsic::loongarch_lasx_xvaddi_wu:
7082 case Intrinsic::loongarch_lasx_xvaddi_du:
7083 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7084 lowerVectorSplatImm<5>(N, 2, DAG));
7085 case Intrinsic::loongarch_lsx_vsub_b:
7086 case Intrinsic::loongarch_lsx_vsub_h:
7087 case Intrinsic::loongarch_lsx_vsub_w:
7088 case Intrinsic::loongarch_lsx_vsub_d:
7089 case Intrinsic::loongarch_lasx_xvsub_b:
7090 case Intrinsic::loongarch_lasx_xvsub_h:
7091 case Intrinsic::loongarch_lasx_xvsub_w:
7092 case Intrinsic::loongarch_lasx_xvsub_d:
7093 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7094 N->getOperand(2));
7095 case Intrinsic::loongarch_lsx_vsubi_bu:
7096 case Intrinsic::loongarch_lsx_vsubi_hu:
7097 case Intrinsic::loongarch_lsx_vsubi_wu:
7098 case Intrinsic::loongarch_lsx_vsubi_du:
7099 case Intrinsic::loongarch_lasx_xvsubi_bu:
7100 case Intrinsic::loongarch_lasx_xvsubi_hu:
7101 case Intrinsic::loongarch_lasx_xvsubi_wu:
7102 case Intrinsic::loongarch_lasx_xvsubi_du:
7103 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7104 lowerVectorSplatImm<5>(N, 2, DAG));
7105 case Intrinsic::loongarch_lsx_vneg_b:
7106 case Intrinsic::loongarch_lsx_vneg_h:
7107 case Intrinsic::loongarch_lsx_vneg_w:
7108 case Intrinsic::loongarch_lsx_vneg_d:
7109 case Intrinsic::loongarch_lasx_xvneg_b:
7110 case Intrinsic::loongarch_lasx_xvneg_h:
7111 case Intrinsic::loongarch_lasx_xvneg_w:
7112 case Intrinsic::loongarch_lasx_xvneg_d:
7113 return DAG.getNode(
7114 ISD::SUB, DL, N->getValueType(0),
7115 DAG.getConstant(
7116 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
7117 /*isSigned=*/true),
7118 SDLoc(N), N->getValueType(0)),
7119 N->getOperand(1));
7120 case Intrinsic::loongarch_lsx_vmax_b:
7121 case Intrinsic::loongarch_lsx_vmax_h:
7122 case Intrinsic::loongarch_lsx_vmax_w:
7123 case Intrinsic::loongarch_lsx_vmax_d:
7124 case Intrinsic::loongarch_lasx_xvmax_b:
7125 case Intrinsic::loongarch_lasx_xvmax_h:
7126 case Intrinsic::loongarch_lasx_xvmax_w:
7127 case Intrinsic::loongarch_lasx_xvmax_d:
7128 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7129 N->getOperand(2));
7130 case Intrinsic::loongarch_lsx_vmax_bu:
7131 case Intrinsic::loongarch_lsx_vmax_hu:
7132 case Intrinsic::loongarch_lsx_vmax_wu:
7133 case Intrinsic::loongarch_lsx_vmax_du:
7134 case Intrinsic::loongarch_lasx_xvmax_bu:
7135 case Intrinsic::loongarch_lasx_xvmax_hu:
7136 case Intrinsic::loongarch_lasx_xvmax_wu:
7137 case Intrinsic::loongarch_lasx_xvmax_du:
7138 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7139 N->getOperand(2));
7140 case Intrinsic::loongarch_lsx_vmaxi_b:
7141 case Intrinsic::loongarch_lsx_vmaxi_h:
7142 case Intrinsic::loongarch_lsx_vmaxi_w:
7143 case Intrinsic::loongarch_lsx_vmaxi_d:
7144 case Intrinsic::loongarch_lasx_xvmaxi_b:
7145 case Intrinsic::loongarch_lasx_xvmaxi_h:
7146 case Intrinsic::loongarch_lasx_xvmaxi_w:
7147 case Intrinsic::loongarch_lasx_xvmaxi_d:
7148 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7149 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7150 case Intrinsic::loongarch_lsx_vmaxi_bu:
7151 case Intrinsic::loongarch_lsx_vmaxi_hu:
7152 case Intrinsic::loongarch_lsx_vmaxi_wu:
7153 case Intrinsic::loongarch_lsx_vmaxi_du:
7154 case Intrinsic::loongarch_lasx_xvmaxi_bu:
7155 case Intrinsic::loongarch_lasx_xvmaxi_hu:
7156 case Intrinsic::loongarch_lasx_xvmaxi_wu:
7157 case Intrinsic::loongarch_lasx_xvmaxi_du:
7158 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7159 lowerVectorSplatImm<5>(N, 2, DAG));
7160 case Intrinsic::loongarch_lsx_vmin_b:
7161 case Intrinsic::loongarch_lsx_vmin_h:
7162 case Intrinsic::loongarch_lsx_vmin_w:
7163 case Intrinsic::loongarch_lsx_vmin_d:
7164 case Intrinsic::loongarch_lasx_xvmin_b:
7165 case Intrinsic::loongarch_lasx_xvmin_h:
7166 case Intrinsic::loongarch_lasx_xvmin_w:
7167 case Intrinsic::loongarch_lasx_xvmin_d:
7168 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7169 N->getOperand(2));
7170 case Intrinsic::loongarch_lsx_vmin_bu:
7171 case Intrinsic::loongarch_lsx_vmin_hu:
7172 case Intrinsic::loongarch_lsx_vmin_wu:
7173 case Intrinsic::loongarch_lsx_vmin_du:
7174 case Intrinsic::loongarch_lasx_xvmin_bu:
7175 case Intrinsic::loongarch_lasx_xvmin_hu:
7176 case Intrinsic::loongarch_lasx_xvmin_wu:
7177 case Intrinsic::loongarch_lasx_xvmin_du:
7178 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7179 N->getOperand(2));
7180 case Intrinsic::loongarch_lsx_vmini_b:
7181 case Intrinsic::loongarch_lsx_vmini_h:
7182 case Intrinsic::loongarch_lsx_vmini_w:
7183 case Intrinsic::loongarch_lsx_vmini_d:
7184 case Intrinsic::loongarch_lasx_xvmini_b:
7185 case Intrinsic::loongarch_lasx_xvmini_h:
7186 case Intrinsic::loongarch_lasx_xvmini_w:
7187 case Intrinsic::loongarch_lasx_xvmini_d:
7188 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7189 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7190 case Intrinsic::loongarch_lsx_vmini_bu:
7191 case Intrinsic::loongarch_lsx_vmini_hu:
7192 case Intrinsic::loongarch_lsx_vmini_wu:
7193 case Intrinsic::loongarch_lsx_vmini_du:
7194 case Intrinsic::loongarch_lasx_xvmini_bu:
7195 case Intrinsic::loongarch_lasx_xvmini_hu:
7196 case Intrinsic::loongarch_lasx_xvmini_wu:
7197 case Intrinsic::loongarch_lasx_xvmini_du:
7198 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7199 lowerVectorSplatImm<5>(N, 2, DAG));
7200 case Intrinsic::loongarch_lsx_vmul_b:
7201 case Intrinsic::loongarch_lsx_vmul_h:
7202 case Intrinsic::loongarch_lsx_vmul_w:
7203 case Intrinsic::loongarch_lsx_vmul_d:
7204 case Intrinsic::loongarch_lasx_xvmul_b:
7205 case Intrinsic::loongarch_lasx_xvmul_h:
7206 case Intrinsic::loongarch_lasx_xvmul_w:
7207 case Intrinsic::loongarch_lasx_xvmul_d:
7208 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
7209 N->getOperand(2));
7210 case Intrinsic::loongarch_lsx_vmadd_b:
7211 case Intrinsic::loongarch_lsx_vmadd_h:
7212 case Intrinsic::loongarch_lsx_vmadd_w:
7213 case Intrinsic::loongarch_lsx_vmadd_d:
7214 case Intrinsic::loongarch_lasx_xvmadd_b:
7215 case Intrinsic::loongarch_lasx_xvmadd_h:
7216 case Intrinsic::loongarch_lasx_xvmadd_w:
7217 case Intrinsic::loongarch_lasx_xvmadd_d: {
7218 EVT ResTy = N->getValueType(0);
7219 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
7220 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7221 N->getOperand(3)));
7222 }
7223 case Intrinsic::loongarch_lsx_vmsub_b:
7224 case Intrinsic::loongarch_lsx_vmsub_h:
7225 case Intrinsic::loongarch_lsx_vmsub_w:
7226 case Intrinsic::loongarch_lsx_vmsub_d:
7227 case Intrinsic::loongarch_lasx_xvmsub_b:
7228 case Intrinsic::loongarch_lasx_xvmsub_h:
7229 case Intrinsic::loongarch_lasx_xvmsub_w:
7230 case Intrinsic::loongarch_lasx_xvmsub_d: {
7231 EVT ResTy = N->getValueType(0);
7232 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
7233 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7234 N->getOperand(3)));
7235 }
7236 case Intrinsic::loongarch_lsx_vdiv_b:
7237 case Intrinsic::loongarch_lsx_vdiv_h:
7238 case Intrinsic::loongarch_lsx_vdiv_w:
7239 case Intrinsic::loongarch_lsx_vdiv_d:
7240 case Intrinsic::loongarch_lasx_xvdiv_b:
7241 case Intrinsic::loongarch_lasx_xvdiv_h:
7242 case Intrinsic::loongarch_lasx_xvdiv_w:
7243 case Intrinsic::loongarch_lasx_xvdiv_d:
7244 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
7245 N->getOperand(2));
7246 case Intrinsic::loongarch_lsx_vdiv_bu:
7247 case Intrinsic::loongarch_lsx_vdiv_hu:
7248 case Intrinsic::loongarch_lsx_vdiv_wu:
7249 case Intrinsic::loongarch_lsx_vdiv_du:
7250 case Intrinsic::loongarch_lasx_xvdiv_bu:
7251 case Intrinsic::loongarch_lasx_xvdiv_hu:
7252 case Intrinsic::loongarch_lasx_xvdiv_wu:
7253 case Intrinsic::loongarch_lasx_xvdiv_du:
7254 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
7255 N->getOperand(2));
7256 case Intrinsic::loongarch_lsx_vmod_b:
7257 case Intrinsic::loongarch_lsx_vmod_h:
7258 case Intrinsic::loongarch_lsx_vmod_w:
7259 case Intrinsic::loongarch_lsx_vmod_d:
7260 case Intrinsic::loongarch_lasx_xvmod_b:
7261 case Intrinsic::loongarch_lasx_xvmod_h:
7262 case Intrinsic::loongarch_lasx_xvmod_w:
7263 case Intrinsic::loongarch_lasx_xvmod_d:
7264 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
7265 N->getOperand(2));
7266 case Intrinsic::loongarch_lsx_vmod_bu:
7267 case Intrinsic::loongarch_lsx_vmod_hu:
7268 case Intrinsic::loongarch_lsx_vmod_wu:
7269 case Intrinsic::loongarch_lsx_vmod_du:
7270 case Intrinsic::loongarch_lasx_xvmod_bu:
7271 case Intrinsic::loongarch_lasx_xvmod_hu:
7272 case Intrinsic::loongarch_lasx_xvmod_wu:
7273 case Intrinsic::loongarch_lasx_xvmod_du:
7274 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
7275 N->getOperand(2));
7276 case Intrinsic::loongarch_lsx_vand_v:
7277 case Intrinsic::loongarch_lasx_xvand_v:
7278 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7279 N->getOperand(2));
7280 case Intrinsic::loongarch_lsx_vor_v:
7281 case Intrinsic::loongarch_lasx_xvor_v:
7282 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7283 N->getOperand(2));
7284 case Intrinsic::loongarch_lsx_vxor_v:
7285 case Intrinsic::loongarch_lasx_xvxor_v:
7286 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7287 N->getOperand(2));
7288 case Intrinsic::loongarch_lsx_vnor_v:
7289 case Intrinsic::loongarch_lasx_xvnor_v: {
7290 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7291 N->getOperand(2));
7292 return DAG.getNOT(DL, Res, Res->getValueType(0));
7293 }
7294 case Intrinsic::loongarch_lsx_vandi_b:
7295 case Intrinsic::loongarch_lasx_xvandi_b:
7296 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7297 lowerVectorSplatImm<8>(N, 2, DAG));
7298 case Intrinsic::loongarch_lsx_vori_b:
7299 case Intrinsic::loongarch_lasx_xvori_b:
7300 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7301 lowerVectorSplatImm<8>(N, 2, DAG));
7302 case Intrinsic::loongarch_lsx_vxori_b:
7303 case Intrinsic::loongarch_lasx_xvxori_b:
7304 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7305 lowerVectorSplatImm<8>(N, 2, DAG));
7306 case Intrinsic::loongarch_lsx_vsll_b:
7307 case Intrinsic::loongarch_lsx_vsll_h:
7308 case Intrinsic::loongarch_lsx_vsll_w:
7309 case Intrinsic::loongarch_lsx_vsll_d:
7310 case Intrinsic::loongarch_lasx_xvsll_b:
7311 case Intrinsic::loongarch_lasx_xvsll_h:
7312 case Intrinsic::loongarch_lasx_xvsll_w:
7313 case Intrinsic::loongarch_lasx_xvsll_d:
7314 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7315 truncateVecElts(N, DAG));
7316 case Intrinsic::loongarch_lsx_vslli_b:
7317 case Intrinsic::loongarch_lasx_xvslli_b:
7318 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7319 lowerVectorSplatImm<3>(N, 2, DAG));
7320 case Intrinsic::loongarch_lsx_vslli_h:
7321 case Intrinsic::loongarch_lasx_xvslli_h:
7322 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7323 lowerVectorSplatImm<4>(N, 2, DAG));
7324 case Intrinsic::loongarch_lsx_vslli_w:
7325 case Intrinsic::loongarch_lasx_xvslli_w:
7326 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7327 lowerVectorSplatImm<5>(N, 2, DAG));
7328 case Intrinsic::loongarch_lsx_vslli_d:
7329 case Intrinsic::loongarch_lasx_xvslli_d:
7330 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7331 lowerVectorSplatImm<6>(N, 2, DAG));
7332 case Intrinsic::loongarch_lsx_vsrl_b:
7333 case Intrinsic::loongarch_lsx_vsrl_h:
7334 case Intrinsic::loongarch_lsx_vsrl_w:
7335 case Intrinsic::loongarch_lsx_vsrl_d:
7336 case Intrinsic::loongarch_lasx_xvsrl_b:
7337 case Intrinsic::loongarch_lasx_xvsrl_h:
7338 case Intrinsic::loongarch_lasx_xvsrl_w:
7339 case Intrinsic::loongarch_lasx_xvsrl_d:
7340 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7341 truncateVecElts(N, DAG));
7342 case Intrinsic::loongarch_lsx_vsrli_b:
7343 case Intrinsic::loongarch_lasx_xvsrli_b:
7344 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7345 lowerVectorSplatImm<3>(N, 2, DAG));
7346 case Intrinsic::loongarch_lsx_vsrli_h:
7347 case Intrinsic::loongarch_lasx_xvsrli_h:
7348 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7349 lowerVectorSplatImm<4>(N, 2, DAG));
7350 case Intrinsic::loongarch_lsx_vsrli_w:
7351 case Intrinsic::loongarch_lasx_xvsrli_w:
7352 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7353 lowerVectorSplatImm<5>(N, 2, DAG));
7354 case Intrinsic::loongarch_lsx_vsrli_d:
7355 case Intrinsic::loongarch_lasx_xvsrli_d:
7356 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7357 lowerVectorSplatImm<6>(N, 2, DAG));
7358 case Intrinsic::loongarch_lsx_vsra_b:
7359 case Intrinsic::loongarch_lsx_vsra_h:
7360 case Intrinsic::loongarch_lsx_vsra_w:
7361 case Intrinsic::loongarch_lsx_vsra_d:
7362 case Intrinsic::loongarch_lasx_xvsra_b:
7363 case Intrinsic::loongarch_lasx_xvsra_h:
7364 case Intrinsic::loongarch_lasx_xvsra_w:
7365 case Intrinsic::loongarch_lasx_xvsra_d:
7366 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7367 truncateVecElts(N, DAG));
7368 case Intrinsic::loongarch_lsx_vsrai_b:
7369 case Intrinsic::loongarch_lasx_xvsrai_b:
7370 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7371 lowerVectorSplatImm<3>(N, 2, DAG));
7372 case Intrinsic::loongarch_lsx_vsrai_h:
7373 case Intrinsic::loongarch_lasx_xvsrai_h:
7374 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7375 lowerVectorSplatImm<4>(N, 2, DAG));
7376 case Intrinsic::loongarch_lsx_vsrai_w:
7377 case Intrinsic::loongarch_lasx_xvsrai_w:
7378 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7379 lowerVectorSplatImm<5>(N, 2, DAG));
7380 case Intrinsic::loongarch_lsx_vsrai_d:
7381 case Intrinsic::loongarch_lasx_xvsrai_d:
7382 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7383 lowerVectorSplatImm<6>(N, 2, DAG));
7384 case Intrinsic::loongarch_lsx_vclz_b:
7385 case Intrinsic::loongarch_lsx_vclz_h:
7386 case Intrinsic::loongarch_lsx_vclz_w:
7387 case Intrinsic::loongarch_lsx_vclz_d:
7388 case Intrinsic::loongarch_lasx_xvclz_b:
7389 case Intrinsic::loongarch_lasx_xvclz_h:
7390 case Intrinsic::loongarch_lasx_xvclz_w:
7391 case Intrinsic::loongarch_lasx_xvclz_d:
7392 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
7393 case Intrinsic::loongarch_lsx_vpcnt_b:
7394 case Intrinsic::loongarch_lsx_vpcnt_h:
7395 case Intrinsic::loongarch_lsx_vpcnt_w:
7396 case Intrinsic::loongarch_lsx_vpcnt_d:
7397 case Intrinsic::loongarch_lasx_xvpcnt_b:
7398 case Intrinsic::loongarch_lasx_xvpcnt_h:
7399 case Intrinsic::loongarch_lasx_xvpcnt_w:
7400 case Intrinsic::loongarch_lasx_xvpcnt_d:
7401 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
7402 case Intrinsic::loongarch_lsx_vbitclr_b:
7403 case Intrinsic::loongarch_lsx_vbitclr_h:
7404 case Intrinsic::loongarch_lsx_vbitclr_w:
7405 case Intrinsic::loongarch_lsx_vbitclr_d:
7406 case Intrinsic::loongarch_lasx_xvbitclr_b:
7407 case Intrinsic::loongarch_lasx_xvbitclr_h:
7408 case Intrinsic::loongarch_lasx_xvbitclr_w:
7409 case Intrinsic::loongarch_lasx_xvbitclr_d:
7410 return lowerVectorBitClear(N, DAG);
7411 case Intrinsic::loongarch_lsx_vbitclri_b:
7412 case Intrinsic::loongarch_lasx_xvbitclri_b:
7413 return lowerVectorBitClearImm<3>(N, DAG);
7414 case Intrinsic::loongarch_lsx_vbitclri_h:
7415 case Intrinsic::loongarch_lasx_xvbitclri_h:
7416 return lowerVectorBitClearImm<4>(N, DAG);
7417 case Intrinsic::loongarch_lsx_vbitclri_w:
7418 case Intrinsic::loongarch_lasx_xvbitclri_w:
7419 return lowerVectorBitClearImm<5>(N, DAG);
7420 case Intrinsic::loongarch_lsx_vbitclri_d:
7421 case Intrinsic::loongarch_lasx_xvbitclri_d:
7422 return lowerVectorBitClearImm<6>(N, DAG);
7423 case Intrinsic::loongarch_lsx_vbitset_b:
7424 case Intrinsic::loongarch_lsx_vbitset_h:
7425 case Intrinsic::loongarch_lsx_vbitset_w:
7426 case Intrinsic::loongarch_lsx_vbitset_d:
7427 case Intrinsic::loongarch_lasx_xvbitset_b:
7428 case Intrinsic::loongarch_lasx_xvbitset_h:
7429 case Intrinsic::loongarch_lasx_xvbitset_w:
7430 case Intrinsic::loongarch_lasx_xvbitset_d: {
7431 EVT VecTy = N->getValueType(0);
7432 SDValue One = DAG.getConstant(1, DL, VecTy);
7433 return DAG.getNode(
7434 ISD::OR, DL, VecTy, N->getOperand(1),
7435 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7436 }
7437 case Intrinsic::loongarch_lsx_vbitseti_b:
7438 case Intrinsic::loongarch_lasx_xvbitseti_b:
7439 return lowerVectorBitSetImm<3>(N, DAG);
7440 case Intrinsic::loongarch_lsx_vbitseti_h:
7441 case Intrinsic::loongarch_lasx_xvbitseti_h:
7442 return lowerVectorBitSetImm<4>(N, DAG);
7443 case Intrinsic::loongarch_lsx_vbitseti_w:
7444 case Intrinsic::loongarch_lasx_xvbitseti_w:
7445 return lowerVectorBitSetImm<5>(N, DAG);
7446 case Intrinsic::loongarch_lsx_vbitseti_d:
7447 case Intrinsic::loongarch_lasx_xvbitseti_d:
7448 return lowerVectorBitSetImm<6>(N, DAG);
7449 case Intrinsic::loongarch_lsx_vbitrev_b:
7450 case Intrinsic::loongarch_lsx_vbitrev_h:
7451 case Intrinsic::loongarch_lsx_vbitrev_w:
7452 case Intrinsic::loongarch_lsx_vbitrev_d:
7453 case Intrinsic::loongarch_lasx_xvbitrev_b:
7454 case Intrinsic::loongarch_lasx_xvbitrev_h:
7455 case Intrinsic::loongarch_lasx_xvbitrev_w:
7456 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7457 EVT VecTy = N->getValueType(0);
7458 SDValue One = DAG.getConstant(1, DL, VecTy);
7459 return DAG.getNode(
7460 ISD::XOR, DL, VecTy, N->getOperand(1),
7461 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7462 }
7463 case Intrinsic::loongarch_lsx_vbitrevi_b:
7464 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7465 return lowerVectorBitRevImm<3>(N, DAG);
7466 case Intrinsic::loongarch_lsx_vbitrevi_h:
7467 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7468 return lowerVectorBitRevImm<4>(N, DAG);
7469 case Intrinsic::loongarch_lsx_vbitrevi_w:
7470 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7471 return lowerVectorBitRevImm<5>(N, DAG);
7472 case Intrinsic::loongarch_lsx_vbitrevi_d:
7473 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7474 return lowerVectorBitRevImm<6>(N, DAG);
7475 case Intrinsic::loongarch_lsx_vfadd_s:
7476 case Intrinsic::loongarch_lsx_vfadd_d:
7477 case Intrinsic::loongarch_lasx_xvfadd_s:
7478 case Intrinsic::loongarch_lasx_xvfadd_d:
7479 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
7480 N->getOperand(2));
7481 case Intrinsic::loongarch_lsx_vfsub_s:
7482 case Intrinsic::loongarch_lsx_vfsub_d:
7483 case Intrinsic::loongarch_lasx_xvfsub_s:
7484 case Intrinsic::loongarch_lasx_xvfsub_d:
7485 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
7486 N->getOperand(2));
7487 case Intrinsic::loongarch_lsx_vfmul_s:
7488 case Intrinsic::loongarch_lsx_vfmul_d:
7489 case Intrinsic::loongarch_lasx_xvfmul_s:
7490 case Intrinsic::loongarch_lasx_xvfmul_d:
7491 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
7492 N->getOperand(2));
7493 case Intrinsic::loongarch_lsx_vfdiv_s:
7494 case Intrinsic::loongarch_lsx_vfdiv_d:
7495 case Intrinsic::loongarch_lasx_xvfdiv_s:
7496 case Intrinsic::loongarch_lasx_xvfdiv_d:
7497 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
7498 N->getOperand(2));
7499 case Intrinsic::loongarch_lsx_vfmadd_s:
7500 case Intrinsic::loongarch_lsx_vfmadd_d:
7501 case Intrinsic::loongarch_lasx_xvfmadd_s:
7502 case Intrinsic::loongarch_lasx_xvfmadd_d:
7503 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
7504 N->getOperand(2), N->getOperand(3));
7505 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7506 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7507 N->getOperand(1), N->getOperand(2),
7508 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
7509 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7510 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7511 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7512 N->getOperand(1), N->getOperand(2),
7513 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
7514 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7515 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7516 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7517 N->getOperand(1), N->getOperand(2),
7518 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
7519 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7520 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7521 N->getOperand(1), N->getOperand(2),
7522 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
7523 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7524 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7525 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7526 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7527 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7528 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7529 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7530 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7531 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
7532 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7533 N->getOperand(1)));
7534 case Intrinsic::loongarch_lsx_vreplve_b:
7535 case Intrinsic::loongarch_lsx_vreplve_h:
7536 case Intrinsic::loongarch_lsx_vreplve_w:
7537 case Intrinsic::loongarch_lsx_vreplve_d:
7538 case Intrinsic::loongarch_lasx_xvreplve_b:
7539 case Intrinsic::loongarch_lasx_xvreplve_h:
7540 case Intrinsic::loongarch_lasx_xvreplve_w:
7541 case Intrinsic::loongarch_lasx_xvreplve_d:
7542 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
7543 N->getOperand(1),
7544 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7545 N->getOperand(2)));
7546 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7547 if (!Subtarget.is64Bit())
7548 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7549 break;
7550 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7551 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7552 if (!Subtarget.is64Bit())
7553 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7554 break;
7555 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7556 if (!Subtarget.is64Bit())
7557 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7558 break;
7559 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7560 if (!Subtarget.is64Bit())
7561 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7562 break;
7563 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7564 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7565 if (!Subtarget.is64Bit())
7566 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7567 break;
7568 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7569 if (!Subtarget.is64Bit())
7570 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7571 break;
7572 case Intrinsic::loongarch_lsx_bz_b:
7573 case Intrinsic::loongarch_lsx_bz_h:
7574 case Intrinsic::loongarch_lsx_bz_w:
7575 case Intrinsic::loongarch_lsx_bz_d:
7576 case Intrinsic::loongarch_lasx_xbz_b:
7577 case Intrinsic::loongarch_lasx_xbz_h:
7578 case Intrinsic::loongarch_lasx_xbz_w:
7579 case Intrinsic::loongarch_lasx_xbz_d:
7580 if (!Subtarget.is64Bit())
7581 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
7582 N->getOperand(1));
7583 break;
7584 case Intrinsic::loongarch_lsx_bz_v:
7585 case Intrinsic::loongarch_lasx_xbz_v:
7586 if (!Subtarget.is64Bit())
7587 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7588 N->getOperand(1));
7589 break;
7590 case Intrinsic::loongarch_lsx_bnz_b:
7591 case Intrinsic::loongarch_lsx_bnz_h:
7592 case Intrinsic::loongarch_lsx_bnz_w:
7593 case Intrinsic::loongarch_lsx_bnz_d:
7594 case Intrinsic::loongarch_lasx_xbnz_b:
7595 case Intrinsic::loongarch_lasx_xbnz_h:
7596 case Intrinsic::loongarch_lasx_xbnz_w:
7597 case Intrinsic::loongarch_lasx_xbnz_d:
7598 if (!Subtarget.is64Bit())
7599 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7600 N->getOperand(1));
7601 break;
7602 case Intrinsic::loongarch_lsx_bnz_v:
7603 case Intrinsic::loongarch_lasx_xbnz_v:
7604 if (!Subtarget.is64Bit())
7605 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7606 N->getOperand(1));
7607 break;
7608 case Intrinsic::loongarch_lasx_concat_128_s:
7609 case Intrinsic::loongarch_lasx_concat_128_d:
7610 case Intrinsic::loongarch_lasx_concat_128:
7611 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7612 N->getOperand(1), N->getOperand(2));
7613 }
7614 return SDValue();
7615}
7616
7619 const LoongArchSubtarget &Subtarget) {
7620 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7621 // conversion is unnecessary and can be replaced with the
7622 // MOVFR2GR_S_LA64 operand.
7623 SDValue Op0 = N->getOperand(0);
7624 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7625 return Op0.getOperand(0);
7626 return SDValue();
7627}
7628
7631 const LoongArchSubtarget &Subtarget) {
7632 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7633 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7634 // operand.
7635 SDValue Op0 = N->getOperand(0);
7636 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7637 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7638 "Unexpected value type!");
7639 return Op0.getOperand(0);
7640 }
7641 return SDValue();
7642}
7643
7646 const LoongArchSubtarget &Subtarget) {
7647 MVT VT = N->getSimpleValueType(0);
7648 unsigned NumBits = VT.getScalarSizeInBits();
7649
7650 // Simplify the inputs.
7651 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7652 APInt DemandedMask(APInt::getAllOnes(NumBits));
7653 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
7654 return SDValue(N, 0);
7655
7656 return SDValue();
7657}
7658
7659static SDValue
7662 const LoongArchSubtarget &Subtarget) {
7663 SDValue Op0 = N->getOperand(0);
7664 SDLoc DL(N);
7665
7666 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7667 // redundant. Instead, use BuildPairF64's operands directly.
7668 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7669 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7670
7671 if (Op0->isUndef()) {
7672 SDValue Lo = DAG.getUNDEF(MVT::i32);
7673 SDValue Hi = DAG.getUNDEF(MVT::i32);
7674 return DCI.CombineTo(N, Lo, Hi);
7675 }
7676
7677 // It's cheaper to materialise two 32-bit integers than to load a double
7678 // from the constant pool and transfer it to integer registers through the
7679 // stack.
7681 APInt V = C->getValueAPF().bitcastToAPInt();
7682 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7683 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7684 return DCI.CombineTo(N, Lo, Hi);
7685 }
7686
7687 return SDValue();
7688}
7689
7690/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
7693 const LoongArchSubtarget &Subtarget) {
7694 SDValue N0 = N->getOperand(0);
7695 SDValue N1 = N->getOperand(1);
7696 MVT VT = N->getSimpleValueType(0);
7697 SDLoc DL(N);
7698
7699 // VANDN(undef, x) -> 0
7700 // VANDN(x, undef) -> 0
7701 if (N0.isUndef() || N1.isUndef())
7702 return DAG.getConstant(0, DL, VT);
7703
7704 // VANDN(0, x) -> x
7706 return N1;
7707
7708 // VANDN(x, 0) -> 0
7710 return DAG.getConstant(0, DL, VT);
7711
7712 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
7714 return DAG.getNOT(DL, N0, VT);
7715
7716 // Turn VANDN back to AND if input is inverted.
7717 if (SDValue Not = isNOT(N0, DAG))
7718 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
7719
7720 // Folds for better commutativity:
7721 if (N1->hasOneUse()) {
7722 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
7723 if (SDValue Not = isNOT(N1, DAG))
7724 return DAG.getNOT(
7725 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
7726
7727 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
7728 // -> NOT(OR(x, SplatVector(-Imm))
7729 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
7730 // gain benefits.
7731 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
7732 N1.getOpcode() == ISD::BUILD_VECTOR) {
7733 if (SDValue SplatValue =
7734 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
7735 if (!N1->isOnlyUserOf(SplatValue.getNode()))
7736 return SDValue();
7737
7738 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
7739 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
7740 SDValue Not =
7741 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
7742 return DAG.getNOT(
7743 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
7744 VT);
7745 }
7746 }
7747 }
7748 }
7749
7750 return SDValue();
7751}
7752
7755 const LoongArchSubtarget &Subtarget) {
7756 SDLoc DL(N);
7757 EVT VT = N->getValueType(0);
7758
7759 if (VT != MVT::f32 && VT != MVT::f64)
7760 return SDValue();
7761 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7762 return SDValue();
7763 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7764 return SDValue();
7765
7766 // Only optimize when the source and destination types have the same width.
7767 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
7768 return SDValue();
7769
7770 SDValue Src = N->getOperand(0);
7771 // If the result of an integer load is only used by an integer-to-float
7772 // conversion, use a fp load instead. This eliminates an integer-to-float-move
7773 // (movgr2fr) instruction.
7774 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
7775 // Do not change the width of a volatile load. This condition check is
7776 // inspired by AArch64.
7777 !cast<LoadSDNode>(Src)->isVolatile()) {
7778 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
7779 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
7780 LN0->getPointerInfo(), LN0->getAlign(),
7781 LN0->getMemOperand()->getFlags());
7782
7783 // Make sure successors of the original load stay after it by updating them
7784 // to use the new Chain.
7785 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7786 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7787 }
7788
7789 return SDValue();
7790}
7791
7792// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7793// logical operations, like in the example below.
7794// or (and (truncate x, truncate y)),
7795// (xor (truncate z, build_vector (constants)))
7796// Given a target type \p VT, we generate
7797// or (and x, y), (xor z, zext(build_vector (constants)))
7798// given x, y and z are of type \p VT. We can do so, if operands are either
7799// truncates from VT types, the second operand is a vector of constants, can
7800// be recursively promoted or is an existing extension we can extend further.
7802 SelectionDAG &DAG,
7803 const LoongArchSubtarget &Subtarget,
7804 unsigned Depth) {
7805 // Limit recursion to avoid excessive compile times.
7807 return SDValue();
7808
7809 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
7810 return SDValue();
7811
7812 SDValue N0 = N.getOperand(0);
7813 SDValue N1 = N.getOperand(1);
7814
7815 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7816 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
7817 return SDValue();
7818
7819 if (SDValue NN0 =
7820 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
7821 N0 = NN0;
7822 else {
7823 // The left side has to be a 'trunc'.
7824 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7825 N0.getOperand(0).getValueType() == VT;
7826 if (LHSTrunc)
7827 N0 = N0.getOperand(0);
7828 else
7829 return SDValue();
7830 }
7831
7832 if (SDValue NN1 =
7833 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
7834 N1 = NN1;
7835 else {
7836 // The right side has to be a 'trunc', a (foldable) constant or an
7837 // existing extension we can extend further.
7838 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7839 N1.getOperand(0).getValueType() == VT;
7840 if (RHSTrunc)
7841 N1 = N1.getOperand(0);
7842 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
7843 Subtarget.hasExtLASX() && N1.hasOneUse())
7844 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
7845 // On 32-bit platform, i64 is an illegal integer scalar type, and
7846 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7847 // future.
7848 else if (SDValue Cst =
7850 N1 = Cst;
7851 else
7852 return SDValue();
7853 }
7854
7855 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
7856}
7857
7858// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7859// is LSX-sized register. In most cases we actually compare or select LASX-sized
7860// registers and mixing the two types creates horrible code. This method
7861// optimizes some of the transition sequences.
7863 SelectionDAG &DAG,
7864 const LoongArchSubtarget &Subtarget) {
7865 EVT VT = N.getValueType();
7866 assert(VT.isVector() && "Expected vector type");
7867 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7868 N.getOpcode() == ISD::ZERO_EXTEND ||
7869 N.getOpcode() == ISD::SIGN_EXTEND) &&
7870 "Invalid Node");
7871
7872 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7873 return SDValue();
7874
7875 SDValue Narrow = N.getOperand(0);
7876 EVT NarrowVT = Narrow.getValueType();
7877
7878 // Generate the wide operation.
7879 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
7880 if (!Op)
7881 return SDValue();
7882 switch (N.getOpcode()) {
7883 default:
7884 llvm_unreachable("Unexpected opcode");
7885 case ISD::ANY_EXTEND:
7886 return Op;
7887 case ISD::ZERO_EXTEND:
7888 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
7889 case ISD::SIGN_EXTEND:
7890 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7891 DAG.getValueType(NarrowVT));
7892 }
7893}
7894
7897 const LoongArchSubtarget &Subtarget) {
7898 EVT VT = N->getValueType(0);
7899 SDLoc DL(N);
7900
7901 if (VT.isVector())
7902 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
7903 return R;
7904
7905 return SDValue();
7906}
7907
7908static SDValue
7911 const LoongArchSubtarget &Subtarget) {
7912 SDLoc DL(N);
7913 EVT VT = N->getValueType(0);
7914
7915 if (VT.isVector() && N->getNumOperands() == 2)
7916 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
7917 return R;
7918
7919 return SDValue();
7920}
7921
7924 const LoongArchSubtarget &Subtarget) {
7925 if (DCI.isBeforeLegalizeOps())
7926 return SDValue();
7927
7928 EVT VT = N->getValueType(0);
7929 if (!VT.isVector())
7930 return SDValue();
7931
7932 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7933 return SDValue();
7934
7935 EVT EltVT = VT.getVectorElementType();
7936 if (!EltVT.isInteger())
7937 return SDValue();
7938
7939 SDValue Cond = N->getOperand(0);
7940 SDValue TrueVal = N->getOperand(1);
7941 SDValue FalseVal = N->getOperand(2);
7942
7943 // match:
7944 //
7945 // vselect (setcc shift, 0, seteq),
7946 // x,
7947 // rounded_shift
7948
7949 if (Cond.getOpcode() != ISD::SETCC)
7950 return SDValue();
7951
7952 if (!ISD::isConstantSplatVectorAllZeros(Cond.getOperand(1).getNode()))
7953 return SDValue();
7954
7955 auto *CC = cast<CondCodeSDNode>(Cond.getOperand(2));
7956 if (CC->get() != ISD::SETEQ)
7957 return SDValue();
7958
7959 SDValue Shift = Cond.getOperand(0);
7960
7961 // True branch must be original value:
7962 //
7963 // vselect cond, x, ...
7964
7965 SDValue X = TrueVal;
7966
7967 // Now match rounded shift pattern:
7968 //
7969 // add
7970 // (and
7971 // (srl X, shift-1)
7972 // 1)
7973 // (srl/sra X, shift)
7974
7975 if (FalseVal.getOpcode() != ISD::ADD)
7976 return SDValue();
7977
7978 SDValue Add0 = FalseVal.getOperand(0);
7979 SDValue Add1 = FalseVal.getOperand(1);
7980 SDValue And;
7981 SDValue Shr;
7982
7983 if (Add0.getOpcode() == ISD::AND) {
7984 And = Add0;
7985 Shr = Add1;
7986 } else if (Add1.getOpcode() == ISD::AND) {
7987 And = Add1;
7988 Shr = Add0;
7989 } else {
7990 return SDValue();
7991 }
7992
7993 // match:
7994 //
7995 // srl/sra X, shift
7996
7997 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
7998 return SDValue();
7999
8000 if (Shr.getOperand(0) != X)
8001 return SDValue();
8002
8003 if (Shr.getOperand(1) != Shift)
8004 return SDValue();
8005
8006 // match:
8007 //
8008 // and
8009 // (srl X, shift-1)
8010 // 1
8011
8012 SDValue Srl = And.getOperand(0);
8013 SDValue One = And.getOperand(1);
8014 APInt SplatVal;
8015
8016 if (Srl.getOpcode() != ISD::SRL)
8017 return SDValue();
8018
8019 One = peekThroughBitcasts(One);
8020 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
8021 return SDValue();
8022
8023 if (SplatVal != 1)
8024 return SDValue();
8025
8026 if (Srl.getOperand(0) != X)
8027 return SDValue();
8028
8029 // match:
8030 //
8031 // shift-1
8032
8033 SDValue ShiftMinus1 = Srl.getOperand(1);
8034
8035 if (ShiftMinus1.getOpcode() != ISD::ADD)
8036 return SDValue();
8037
8038 if (ShiftMinus1.getOperand(0) != Shift)
8039 return SDValue();
8040
8042 return SDValue();
8043
8044 // We matched a rounded right shift pattern and can lower it
8045 // to a single vector rounded shift instruction.
8046
8047 SDLoc DL(N);
8048 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
8049 : LoongArchISD::VSRAR,
8050 DL, VT, X, Shift);
8051}
8052
8054 DAGCombinerInfo &DCI) const {
8055 SelectionDAG &DAG = DCI.DAG;
8056 switch (N->getOpcode()) {
8057 default:
8058 break;
8059 case ISD::ADD:
8060 return performADDCombine(N, DAG, DCI, Subtarget);
8061 case ISD::AND:
8062 return performANDCombine(N, DAG, DCI, Subtarget);
8063 case ISD::OR:
8064 return performORCombine(N, DAG, DCI, Subtarget);
8065 case ISD::SETCC:
8066 return performSETCCCombine(N, DAG, DCI, Subtarget);
8067 case ISD::SRL:
8068 return performSRLCombine(N, DAG, DCI, Subtarget);
8069 case ISD::BITCAST:
8070 return performBITCASTCombine(N, DAG, DCI, Subtarget);
8071 case ISD::ANY_EXTEND:
8072 case ISD::ZERO_EXTEND:
8073 case ISD::SIGN_EXTEND:
8074 return performEXTENDCombine(N, DAG, DCI, Subtarget);
8075 case ISD::SINT_TO_FP:
8076 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8077 case LoongArchISD::BITREV_W:
8078 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
8079 case LoongArchISD::BR_CC:
8080 return performBR_CCCombine(N, DAG, DCI, Subtarget);
8081 case LoongArchISD::SELECT_CC:
8082 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
8084 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
8085 case LoongArchISD::MOVGR2FR_W_LA64:
8086 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
8087 case LoongArchISD::MOVFR2GR_S_LA64:
8088 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
8089 case LoongArchISD::VMSKLTZ:
8090 case LoongArchISD::XVMSKLTZ:
8091 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
8092 case LoongArchISD::SPLIT_PAIR_F64:
8093 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
8094 case LoongArchISD::VANDN:
8095 return performVANDNCombine(N, DAG, DCI, Subtarget);
8097 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
8098 case ISD::VSELECT:
8099 return performVSELECTCombine(N, DAG, DCI, Subtarget);
8100 case LoongArchISD::VPACKEV:
8101 case LoongArchISD::VPERMI:
8102 if (SDValue Result =
8103 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
8104 return Result;
8105 }
8106 return SDValue();
8107}
8108
8111 if (!ZeroDivCheck)
8112 return MBB;
8113
8114 // Build instructions:
8115 // MBB:
8116 // div(or mod) $dst, $dividend, $divisor
8117 // bne $divisor, $zero, SinkMBB
8118 // BreakMBB:
8119 // break 7 // BRK_DIVZERO
8120 // SinkMBB:
8121 // fallthrough
8122 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
8123 MachineFunction::iterator It = ++MBB->getIterator();
8124 MachineFunction *MF = MBB->getParent();
8125 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8126 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8127 MF->insert(It, BreakMBB);
8128 MF->insert(It, SinkMBB);
8129
8130 // Transfer the remainder of MBB and its successor edges to SinkMBB.
8131 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
8132 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8133
8134 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
8135 DebugLoc DL = MI.getDebugLoc();
8136 MachineOperand &Divisor = MI.getOperand(2);
8137 Register DivisorReg = Divisor.getReg();
8138
8139 // MBB:
8140 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
8141 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
8142 .addReg(LoongArch::R0)
8143 .addMBB(SinkMBB);
8144 MBB->addSuccessor(BreakMBB);
8145 MBB->addSuccessor(SinkMBB);
8146
8147 // BreakMBB:
8148 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
8149 // definition of BRK_DIVZERO.
8150 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
8151 BreakMBB->addSuccessor(SinkMBB);
8152
8153 // Clear Divisor's kill flag.
8154 Divisor.setIsKill(false);
8155
8156 return SinkMBB;
8157}
8158
8159static MachineBasicBlock *
8161 const LoongArchSubtarget &Subtarget) {
8162 unsigned CondOpc;
8163 switch (MI.getOpcode()) {
8164 default:
8165 llvm_unreachable("Unexpected opcode");
8166 case LoongArch::PseudoVBZ:
8167 CondOpc = LoongArch::VSETEQZ_V;
8168 break;
8169 case LoongArch::PseudoVBZ_B:
8170 CondOpc = LoongArch::VSETANYEQZ_B;
8171 break;
8172 case LoongArch::PseudoVBZ_H:
8173 CondOpc = LoongArch::VSETANYEQZ_H;
8174 break;
8175 case LoongArch::PseudoVBZ_W:
8176 CondOpc = LoongArch::VSETANYEQZ_W;
8177 break;
8178 case LoongArch::PseudoVBZ_D:
8179 CondOpc = LoongArch::VSETANYEQZ_D;
8180 break;
8181 case LoongArch::PseudoVBNZ:
8182 CondOpc = LoongArch::VSETNEZ_V;
8183 break;
8184 case LoongArch::PseudoVBNZ_B:
8185 CondOpc = LoongArch::VSETALLNEZ_B;
8186 break;
8187 case LoongArch::PseudoVBNZ_H:
8188 CondOpc = LoongArch::VSETALLNEZ_H;
8189 break;
8190 case LoongArch::PseudoVBNZ_W:
8191 CondOpc = LoongArch::VSETALLNEZ_W;
8192 break;
8193 case LoongArch::PseudoVBNZ_D:
8194 CondOpc = LoongArch::VSETALLNEZ_D;
8195 break;
8196 case LoongArch::PseudoXVBZ:
8197 CondOpc = LoongArch::XVSETEQZ_V;
8198 break;
8199 case LoongArch::PseudoXVBZ_B:
8200 CondOpc = LoongArch::XVSETANYEQZ_B;
8201 break;
8202 case LoongArch::PseudoXVBZ_H:
8203 CondOpc = LoongArch::XVSETANYEQZ_H;
8204 break;
8205 case LoongArch::PseudoXVBZ_W:
8206 CondOpc = LoongArch::XVSETANYEQZ_W;
8207 break;
8208 case LoongArch::PseudoXVBZ_D:
8209 CondOpc = LoongArch::XVSETANYEQZ_D;
8210 break;
8211 case LoongArch::PseudoXVBNZ:
8212 CondOpc = LoongArch::XVSETNEZ_V;
8213 break;
8214 case LoongArch::PseudoXVBNZ_B:
8215 CondOpc = LoongArch::XVSETALLNEZ_B;
8216 break;
8217 case LoongArch::PseudoXVBNZ_H:
8218 CondOpc = LoongArch::XVSETALLNEZ_H;
8219 break;
8220 case LoongArch::PseudoXVBNZ_W:
8221 CondOpc = LoongArch::XVSETALLNEZ_W;
8222 break;
8223 case LoongArch::PseudoXVBNZ_D:
8224 CondOpc = LoongArch::XVSETALLNEZ_D;
8225 break;
8226 }
8227
8228 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8229 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8230 DebugLoc DL = MI.getDebugLoc();
8233
8234 MachineFunction *F = BB->getParent();
8235 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
8236 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
8237 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
8238
8239 F->insert(It, FalseBB);
8240 F->insert(It, TrueBB);
8241 F->insert(It, SinkBB);
8242
8243 // Transfer the remainder of MBB and its successor edges to Sink.
8244 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
8246
8247 // Insert the real instruction to BB.
8248 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
8249 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
8250
8251 // Insert branch.
8252 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
8253 BB->addSuccessor(FalseBB);
8254 BB->addSuccessor(TrueBB);
8255
8256 // FalseBB.
8257 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8258 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
8259 .addReg(LoongArch::R0)
8260 .addImm(0);
8261 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
8262 FalseBB->addSuccessor(SinkBB);
8263
8264 // TrueBB.
8265 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8266 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
8267 .addReg(LoongArch::R0)
8268 .addImm(1);
8269 TrueBB->addSuccessor(SinkBB);
8270
8271 // SinkBB: merge the results.
8272 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
8273 MI.getOperand(0).getReg())
8274 .addReg(RD1)
8275 .addMBB(FalseBB)
8276 .addReg(RD2)
8277 .addMBB(TrueBB);
8278
8279 // The pseudo instruction is gone now.
8280 MI.eraseFromParent();
8281 return SinkBB;
8282}
8283
8284static MachineBasicBlock *
8286 const LoongArchSubtarget &Subtarget) {
8287 unsigned InsOp;
8288 unsigned BroadcastOp;
8289 unsigned HalfSize;
8290 switch (MI.getOpcode()) {
8291 default:
8292 llvm_unreachable("Unexpected opcode");
8293 case LoongArch::PseudoXVINSGR2VR_B:
8294 HalfSize = 16;
8295 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8296 InsOp = LoongArch::XVEXTRINS_B;
8297 break;
8298 case LoongArch::PseudoXVINSGR2VR_H:
8299 HalfSize = 8;
8300 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8301 InsOp = LoongArch::XVEXTRINS_H;
8302 break;
8303 }
8304 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8305 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8306 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8307 DebugLoc DL = MI.getDebugLoc();
8309 // XDst = vector_insert XSrc, Elt, Idx
8310 Register XDst = MI.getOperand(0).getReg();
8311 Register XSrc = MI.getOperand(1).getReg();
8312 Register Elt = MI.getOperand(2).getReg();
8313 unsigned Idx = MI.getOperand(3).getImm();
8314
8315 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
8316 Idx < HalfSize) {
8317 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
8318 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
8319
8320 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
8321 .addReg(XSrc, {}, LoongArch::sub_128);
8322 BuildMI(*BB, MI, DL,
8323 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
8324 : LoongArch::VINSGR2VR_B),
8325 ScratchSubReg2)
8326 .addReg(ScratchSubReg1)
8327 .addReg(Elt)
8328 .addImm(Idx);
8329
8330 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
8331 .addReg(ScratchSubReg2)
8332 .addImm(LoongArch::sub_128);
8333 } else {
8334 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8335 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8336
8337 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
8338
8339 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
8340 .addReg(ScratchReg1)
8341 .addReg(XSrc)
8342 .addImm(Idx >= HalfSize ? 48 : 18);
8343
8344 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
8345 .addReg(XSrc)
8346 .addReg(ScratchReg2)
8347 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8348 }
8349
8350 MI.eraseFromParent();
8351 return BB;
8352}
8353
8356 const LoongArchSubtarget &Subtarget) {
8357 assert(Subtarget.hasExtLSX());
8358 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8359 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8360 DebugLoc DL = MI.getDebugLoc();
8362 Register Dst = MI.getOperand(0).getReg();
8363 Register Src = MI.getOperand(1).getReg();
8364
8365 unsigned BroadcastOp, CTOp, PickOp;
8366 switch (MI.getOpcode()) {
8367 default:
8368 llvm_unreachable("Unexpected opcode");
8369 case LoongArch::PseudoCTPOP_B:
8370 BroadcastOp = LoongArch::VREPLGR2VR_B;
8371 CTOp = LoongArch::VPCNT_B;
8372 PickOp = LoongArch::VPICKVE2GR_B;
8373 break;
8374 case LoongArch::PseudoCTPOP_H:
8375 case LoongArch::PseudoCTPOP_H_LA32:
8376 BroadcastOp = LoongArch::VREPLGR2VR_H;
8377 CTOp = LoongArch::VPCNT_H;
8378 PickOp = LoongArch::VPICKVE2GR_H;
8379 break;
8380 case LoongArch::PseudoCTPOP_W:
8381 case LoongArch::PseudoCTPOP_W_LA32:
8382 BroadcastOp = LoongArch::VREPLGR2VR_W;
8383 CTOp = LoongArch::VPCNT_W;
8384 PickOp = LoongArch::VPICKVE2GR_W;
8385 break;
8386 case LoongArch::PseudoCTPOP_D:
8387 BroadcastOp = LoongArch::VREPLGR2VR_D;
8388 CTOp = LoongArch::VPCNT_D;
8389 PickOp = LoongArch::VPICKVE2GR_D;
8390 break;
8391 }
8392
8393 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8394 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8395 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src);
8396 BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1);
8397 BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0);
8398
8399 MI.eraseFromParent();
8400 return BB;
8401}
8402
8403static MachineBasicBlock *
8405 const LoongArchSubtarget &Subtarget) {
8406 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8407 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8408 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8410 Register Dst = MI.getOperand(0).getReg();
8411 Register Src = MI.getOperand(1).getReg();
8412 DebugLoc DL = MI.getDebugLoc();
8413 unsigned EleBits = 8;
8414 unsigned NotOpc = 0;
8415 unsigned MskOpc;
8416
8417 switch (MI.getOpcode()) {
8418 default:
8419 llvm_unreachable("Unexpected opcode");
8420 case LoongArch::PseudoVMSKLTZ_B:
8421 MskOpc = LoongArch::VMSKLTZ_B;
8422 break;
8423 case LoongArch::PseudoVMSKLTZ_H:
8424 MskOpc = LoongArch::VMSKLTZ_H;
8425 EleBits = 16;
8426 break;
8427 case LoongArch::PseudoVMSKLTZ_W:
8428 MskOpc = LoongArch::VMSKLTZ_W;
8429 EleBits = 32;
8430 break;
8431 case LoongArch::PseudoVMSKLTZ_D:
8432 MskOpc = LoongArch::VMSKLTZ_D;
8433 EleBits = 64;
8434 break;
8435 case LoongArch::PseudoVMSKGEZ_B:
8436 MskOpc = LoongArch::VMSKGEZ_B;
8437 break;
8438 case LoongArch::PseudoVMSKEQZ_B:
8439 MskOpc = LoongArch::VMSKNZ_B;
8440 NotOpc = LoongArch::VNOR_V;
8441 break;
8442 case LoongArch::PseudoVMSKNEZ_B:
8443 MskOpc = LoongArch::VMSKNZ_B;
8444 break;
8445 case LoongArch::PseudoXVMSKLTZ_B:
8446 MskOpc = LoongArch::XVMSKLTZ_B;
8447 RC = &LoongArch::LASX256RegClass;
8448 break;
8449 case LoongArch::PseudoXVMSKLTZ_H:
8450 MskOpc = LoongArch::XVMSKLTZ_H;
8451 RC = &LoongArch::LASX256RegClass;
8452 EleBits = 16;
8453 break;
8454 case LoongArch::PseudoXVMSKLTZ_W:
8455 MskOpc = LoongArch::XVMSKLTZ_W;
8456 RC = &LoongArch::LASX256RegClass;
8457 EleBits = 32;
8458 break;
8459 case LoongArch::PseudoXVMSKLTZ_D:
8460 MskOpc = LoongArch::XVMSKLTZ_D;
8461 RC = &LoongArch::LASX256RegClass;
8462 EleBits = 64;
8463 break;
8464 case LoongArch::PseudoXVMSKGEZ_B:
8465 MskOpc = LoongArch::XVMSKGEZ_B;
8466 RC = &LoongArch::LASX256RegClass;
8467 break;
8468 case LoongArch::PseudoXVMSKEQZ_B:
8469 MskOpc = LoongArch::XVMSKNZ_B;
8470 NotOpc = LoongArch::XVNOR_V;
8471 RC = &LoongArch::LASX256RegClass;
8472 break;
8473 case LoongArch::PseudoXVMSKNEZ_B:
8474 MskOpc = LoongArch::XVMSKNZ_B;
8475 RC = &LoongArch::LASX256RegClass;
8476 break;
8477 }
8478
8479 Register Msk = MRI.createVirtualRegister(RC);
8480 if (NotOpc) {
8481 Register Tmp = MRI.createVirtualRegister(RC);
8482 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
8483 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
8484 .addReg(Tmp, RegState::Kill)
8485 .addReg(Tmp, RegState::Kill);
8486 } else {
8487 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
8488 }
8489
8490 if (TRI->getRegSizeInBits(*RC) > 128) {
8491 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8492 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8493 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
8494 .addReg(Msk)
8495 .addImm(0);
8496 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
8497 .addReg(Msk, RegState::Kill)
8498 .addImm(4);
8499 BuildMI(*BB, MI, DL,
8500 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
8501 : LoongArch::BSTRINS_W),
8502 Dst)
8505 .addImm(256 / EleBits - 1)
8506 .addImm(128 / EleBits);
8507 } else {
8508 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
8509 .addReg(Msk, RegState::Kill)
8510 .addImm(0);
8511 }
8512
8513 MI.eraseFromParent();
8514 return BB;
8515}
8516
8517static MachineBasicBlock *
8519 const LoongArchSubtarget &Subtarget) {
8520 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
8521 "Unexpected instruction");
8522
8523 MachineFunction &MF = *BB->getParent();
8524 DebugLoc DL = MI.getDebugLoc();
8526 Register LoReg = MI.getOperand(0).getReg();
8527 Register HiReg = MI.getOperand(1).getReg();
8528 Register SrcReg = MI.getOperand(2).getReg();
8529
8530 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
8531 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
8532 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
8533 MI.eraseFromParent(); // The pseudo instruction is gone now.
8534 return BB;
8535}
8536
8537static MachineBasicBlock *
8539 const LoongArchSubtarget &Subtarget) {
8540 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
8541 "Unexpected instruction");
8542
8543 MachineFunction &MF = *BB->getParent();
8544 DebugLoc DL = MI.getDebugLoc();
8547 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
8548 Register DstReg = MI.getOperand(0).getReg();
8549 Register LoReg = MI.getOperand(1).getReg();
8550 Register HiReg = MI.getOperand(2).getReg();
8551
8552 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
8553 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
8554 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
8555 .addReg(TmpReg, RegState::Kill)
8556 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
8557 MI.eraseFromParent(); // The pseudo instruction is gone now.
8558 return BB;
8559}
8560
8562 switch (MI.getOpcode()) {
8563 default:
8564 return false;
8565 case LoongArch::Select_GPR_Using_CC_GPR:
8566 return true;
8567 }
8568}
8569
8570static MachineBasicBlock *
8572 const LoongArchSubtarget &Subtarget) {
8573 // To "insert" Select_* instructions, we actually have to insert the triangle
8574 // control-flow pattern. The incoming instructions know the destination vreg
8575 // to set, the condition code register to branch on, the true/false values to
8576 // select between, and the condcode to use to select the appropriate branch.
8577 //
8578 // We produce the following control flow:
8579 // HeadMBB
8580 // | \
8581 // | IfFalseMBB
8582 // | /
8583 // TailMBB
8584 //
8585 // When we find a sequence of selects we attempt to optimize their emission
8586 // by sharing the control flow. Currently we only handle cases where we have
8587 // multiple selects with the exact same condition (same LHS, RHS and CC).
8588 // The selects may be interleaved with other instructions if the other
8589 // instructions meet some requirements we deem safe:
8590 // - They are not pseudo instructions.
8591 // - They are debug instructions. Otherwise,
8592 // - They do not have side-effects, do not access memory and their inputs do
8593 // not depend on the results of the select pseudo-instructions.
8594 // The TrueV/FalseV operands of the selects cannot depend on the result of
8595 // previous selects in the sequence.
8596 // These conditions could be further relaxed. See the X86 target for a
8597 // related approach and more information.
8598
8599 Register LHS = MI.getOperand(1).getReg();
8600 Register RHS;
8601 if (MI.getOperand(2).isReg())
8602 RHS = MI.getOperand(2).getReg();
8603 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
8604
8605 SmallVector<MachineInstr *, 4> SelectDebugValues;
8606 SmallSet<Register, 4> SelectDests;
8607 SelectDests.insert(MI.getOperand(0).getReg());
8608
8609 MachineInstr *LastSelectPseudo = &MI;
8610 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8611 SequenceMBBI != E; ++SequenceMBBI) {
8612 if (SequenceMBBI->isDebugInstr())
8613 continue;
8614 if (isSelectPseudo(*SequenceMBBI)) {
8615 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8616 !SequenceMBBI->getOperand(2).isReg() ||
8617 SequenceMBBI->getOperand(2).getReg() != RHS ||
8618 SequenceMBBI->getOperand(3).getImm() != CC ||
8619 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8620 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8621 break;
8622 LastSelectPseudo = &*SequenceMBBI;
8623 SequenceMBBI->collectDebugValues(SelectDebugValues);
8624 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8625 continue;
8626 }
8627 if (SequenceMBBI->hasUnmodeledSideEffects() ||
8628 SequenceMBBI->mayLoadOrStore() ||
8629 SequenceMBBI->usesCustomInsertionHook())
8630 break;
8631 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8632 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8633 }))
8634 break;
8635 }
8636
8637 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
8638 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8639 DebugLoc DL = MI.getDebugLoc();
8641
8642 MachineBasicBlock *HeadMBB = BB;
8643 MachineFunction *F = BB->getParent();
8644 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8645 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8646
8647 F->insert(I, IfFalseMBB);
8648 F->insert(I, TailMBB);
8649
8650 // Set the call frame size on entry to the new basic blocks.
8651 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
8652 IfFalseMBB->setCallFrameSize(CallFrameSize);
8653 TailMBB->setCallFrameSize(CallFrameSize);
8654
8655 // Transfer debug instructions associated with the selects to TailMBB.
8656 for (MachineInstr *DebugInstr : SelectDebugValues) {
8657 TailMBB->push_back(DebugInstr->removeFromParent());
8658 }
8659
8660 // Move all instructions after the sequence to TailMBB.
8661 TailMBB->splice(TailMBB->end(), HeadMBB,
8662 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
8663 // Update machine-CFG edges by transferring all successors of the current
8664 // block to the new block which will contain the Phi nodes for the selects.
8665 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
8666 // Set the successors for HeadMBB.
8667 HeadMBB->addSuccessor(IfFalseMBB);
8668 HeadMBB->addSuccessor(TailMBB);
8669
8670 // Insert appropriate branch.
8671 if (MI.getOperand(2).isImm())
8672 BuildMI(HeadMBB, DL, TII.get(CC))
8673 .addReg(LHS)
8674 .addImm(MI.getOperand(2).getImm())
8675 .addMBB(TailMBB);
8676 else
8677 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
8678
8679 // IfFalseMBB just falls through to TailMBB.
8680 IfFalseMBB->addSuccessor(TailMBB);
8681
8682 // Create PHIs for all of the select pseudo-instructions.
8683 auto SelectMBBI = MI.getIterator();
8684 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
8685 auto InsertionPoint = TailMBB->begin();
8686 while (SelectMBBI != SelectEnd) {
8687 auto Next = std::next(SelectMBBI);
8688 if (isSelectPseudo(*SelectMBBI)) {
8689 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
8690 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
8691 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
8692 .addReg(SelectMBBI->getOperand(4).getReg())
8693 .addMBB(HeadMBB)
8694 .addReg(SelectMBBI->getOperand(5).getReg())
8695 .addMBB(IfFalseMBB);
8696 SelectMBBI->eraseFromParent();
8697 }
8698 SelectMBBI = Next;
8699 }
8700
8701 F->getProperties().resetNoPHIs();
8702 return TailMBB;
8703}
8704
8705MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
8706 MachineInstr &MI, MachineBasicBlock *BB) const {
8707 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8708 DebugLoc DL = MI.getDebugLoc();
8709
8710 switch (MI.getOpcode()) {
8711 default:
8712 llvm_unreachable("Unexpected instr type to insert");
8713 case LoongArch::DIV_W:
8714 case LoongArch::DIV_WU:
8715 case LoongArch::MOD_W:
8716 case LoongArch::MOD_WU:
8717 case LoongArch::DIV_D:
8718 case LoongArch::DIV_DU:
8719 case LoongArch::MOD_D:
8720 case LoongArch::MOD_DU:
8721 return insertDivByZeroTrap(MI, BB);
8722 break;
8723 case LoongArch::WRFCSR: {
8724 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
8725 LoongArch::FCSR0 + MI.getOperand(0).getImm())
8726 .addReg(MI.getOperand(1).getReg());
8727 MI.eraseFromParent();
8728 return BB;
8729 }
8730 case LoongArch::RDFCSR: {
8731 MachineInstr *ReadFCSR =
8732 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
8733 MI.getOperand(0).getReg())
8734 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
8735 ReadFCSR->getOperand(1).setIsUndef();
8736 MI.eraseFromParent();
8737 return BB;
8738 }
8739 case LoongArch::Select_GPR_Using_CC_GPR:
8740 return emitSelectPseudo(MI, BB, Subtarget);
8741 case LoongArch::BuildPairF64Pseudo:
8742 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
8743 case LoongArch::SplitPairF64Pseudo:
8744 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
8745 case LoongArch::PseudoVBZ:
8746 case LoongArch::PseudoVBZ_B:
8747 case LoongArch::PseudoVBZ_H:
8748 case LoongArch::PseudoVBZ_W:
8749 case LoongArch::PseudoVBZ_D:
8750 case LoongArch::PseudoVBNZ:
8751 case LoongArch::PseudoVBNZ_B:
8752 case LoongArch::PseudoVBNZ_H:
8753 case LoongArch::PseudoVBNZ_W:
8754 case LoongArch::PseudoVBNZ_D:
8755 case LoongArch::PseudoXVBZ:
8756 case LoongArch::PseudoXVBZ_B:
8757 case LoongArch::PseudoXVBZ_H:
8758 case LoongArch::PseudoXVBZ_W:
8759 case LoongArch::PseudoXVBZ_D:
8760 case LoongArch::PseudoXVBNZ:
8761 case LoongArch::PseudoXVBNZ_B:
8762 case LoongArch::PseudoXVBNZ_H:
8763 case LoongArch::PseudoXVBNZ_W:
8764 case LoongArch::PseudoXVBNZ_D:
8765 return emitVecCondBranchPseudo(MI, BB, Subtarget);
8766 case LoongArch::PseudoXVINSGR2VR_B:
8767 case LoongArch::PseudoXVINSGR2VR_H:
8768 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
8769 case LoongArch::PseudoCTPOP_B:
8770 case LoongArch::PseudoCTPOP_H:
8771 case LoongArch::PseudoCTPOP_W:
8772 case LoongArch::PseudoCTPOP_D:
8773 case LoongArch::PseudoCTPOP_H_LA32:
8774 case LoongArch::PseudoCTPOP_W_LA32:
8775 return emitPseudoCTPOP(MI, BB, Subtarget);
8776 case LoongArch::PseudoVMSKLTZ_B:
8777 case LoongArch::PseudoVMSKLTZ_H:
8778 case LoongArch::PseudoVMSKLTZ_W:
8779 case LoongArch::PseudoVMSKLTZ_D:
8780 case LoongArch::PseudoVMSKGEZ_B:
8781 case LoongArch::PseudoVMSKEQZ_B:
8782 case LoongArch::PseudoVMSKNEZ_B:
8783 case LoongArch::PseudoXVMSKLTZ_B:
8784 case LoongArch::PseudoXVMSKLTZ_H:
8785 case LoongArch::PseudoXVMSKLTZ_W:
8786 case LoongArch::PseudoXVMSKLTZ_D:
8787 case LoongArch::PseudoXVMSKGEZ_B:
8788 case LoongArch::PseudoXVMSKEQZ_B:
8789 case LoongArch::PseudoXVMSKNEZ_B:
8790 return emitPseudoVMSKCOND(MI, BB, Subtarget);
8791 case TargetOpcode::STATEPOINT:
8792 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
8793 // while bl call instruction (where statepoint will be lowered at the
8794 // end) has implicit def. This def is early-clobber as it will be set at
8795 // the moment of the call and earlier than any use is read.
8796 // Add this implicit dead def here as a workaround.
8797 MI.addOperand(*MI.getMF(),
8799 LoongArch::R1, /*isDef*/ true,
8800 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
8801 /*isUndef*/ false, /*isEarlyClobber*/ true));
8802 if (!Subtarget.is64Bit())
8803 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
8804 return emitPatchPoint(MI, BB);
8805 case LoongArch::PROBED_STACKALLOC_DYN:
8806 return emitDynamicProbedAlloc(MI, BB);
8807 }
8808}
8809
8811 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
8812 unsigned *Fast) const {
8813 if (!Subtarget.hasUAL())
8814 return false;
8815
8816 // TODO: set reasonable speed number.
8817 if (Fast)
8818 *Fast = 1;
8819 return true;
8820}
8821
8822//===----------------------------------------------------------------------===//
8823// Calling Convention Implementation
8824//===----------------------------------------------------------------------===//
8825
8826// Eight general-purpose registers a0-a7 used for passing integer arguments,
8827// with a0-a1 reused to return values. Generally, the GPRs are used to pass
8828// fixed-point arguments, and floating-point arguments when no FPR is available
8829// or with soft float ABI.
8830const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
8831 LoongArch::R7, LoongArch::R8, LoongArch::R9,
8832 LoongArch::R10, LoongArch::R11};
8833
8834// PreserveNone calling convention:
8835// Arguments may be passed in any general-purpose registers except:
8836// - R1 : return address register
8837// - R22 : frame pointer
8838// - R31 : base pointer
8839//
8840// All general-purpose registers are treated as caller-saved,
8841// except R1 (RA) and R22 (FP).
8842//
8843// Non-volatile registers are allocated first so that a function
8844// can call normal functions without having to spill and reload
8845// argument registers.
8847 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
8848 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
8849 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
8850 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
8851 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
8852 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
8853 LoongArch::R20};
8854
8855// Eight floating-point registers fa0-fa7 used for passing floating-point
8856// arguments, and fa0-fa1 are also used to return values.
8857const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
8858 LoongArch::F3, LoongArch::F4, LoongArch::F5,
8859 LoongArch::F6, LoongArch::F7};
8860// FPR32 and FPR64 alias each other.
8862 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
8863 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
8864
8865const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
8866 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
8867 LoongArch::VR6, LoongArch::VR7};
8868
8869const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
8870 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
8871 LoongArch::XR6, LoongArch::XR7};
8872
8874 switch (State.getCallingConv()) {
8876 if (!State.isVarArg())
8877 return State.AllocateReg(PreserveNoneArgGPRs);
8878 [[fallthrough]];
8879 default:
8880 return State.AllocateReg(ArgGPRs);
8881 }
8882}
8883
8884// Pass a 2*GRLen argument that has been split into two GRLen values through
8885// registers or the stack as necessary.
8886static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
8887 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
8888 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
8889 ISD::ArgFlagsTy ArgFlags2) {
8890 unsigned GRLenInBytes = GRLen / 8;
8891 if (Register Reg = allocateArgGPR(State)) {
8892 // At least one half can be passed via register.
8893 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8894 VA1.getLocVT(), CCValAssign::Full));
8895 } else {
8896 // Both halves must be passed on the stack, with proper alignment.
8897 Align StackAlign =
8898 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8899 State.addLoc(
8901 State.AllocateStack(GRLenInBytes, StackAlign),
8902 VA1.getLocVT(), CCValAssign::Full));
8903 State.addLoc(CCValAssign::getMem(
8904 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8905 LocVT2, CCValAssign::Full));
8906 return false;
8907 }
8908 if (Register Reg = allocateArgGPR(State)) {
8909 // The second half can also be passed via register.
8910 State.addLoc(
8911 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8912 } else {
8913 // The second half is passed via the stack, without additional alignment.
8914 State.addLoc(CCValAssign::getMem(
8915 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8916 LocVT2, CCValAssign::Full));
8917 }
8918 return false;
8919}
8920
8921// Implements the LoongArch calling convention. Returns true upon failure.
8923 unsigned ValNo, MVT ValVT,
8924 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
8925 CCState &State, bool IsRet, Type *OrigTy) {
8926 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
8927 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
8928 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
8929 MVT LocVT = ValVT;
8930
8931 // Any return value split into more than two values can't be returned
8932 // directly.
8933 if (IsRet && ValNo > 1)
8934 return true;
8935
8936 // If passing a variadic argument, or if no FPR is available.
8937 bool UseGPRForFloat = true;
8938
8939 switch (ABI) {
8940 default:
8941 llvm_unreachable("Unexpected ABI");
8942 break;
8947 UseGPRForFloat = ArgFlags.isVarArg();
8948 break;
8951 break;
8952 }
8953
8954 // If this is a variadic argument, the LoongArch calling convention requires
8955 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
8956 // byte alignment. An aligned register should be used regardless of whether
8957 // the original argument was split during legalisation or not. The argument
8958 // will not be passed by registers if the original type is larger than
8959 // 2*GRLen, so the register alignment rule does not apply.
8960 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8961 if (ArgFlags.isVarArg() &&
8962 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8963 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
8964 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8965 // Skip 'odd' register if necessary.
8966 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8967 State.AllocateReg(ArgGPRs);
8968 }
8969
8970 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8971 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8972 State.getPendingArgFlags();
8973
8974 assert(PendingLocs.size() == PendingArgFlags.size() &&
8975 "PendingLocs and PendingArgFlags out of sync");
8976
8977 // FPR32 and FPR64 alias each other.
8978 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
8979 UseGPRForFloat = true;
8980
8981 if (UseGPRForFloat && ValVT == MVT::f32) {
8982 LocVT = GRLenVT;
8983 LocInfo = CCValAssign::BCvt;
8984 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8985 LocVT = MVT::i64;
8986 LocInfo = CCValAssign::BCvt;
8987 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8988 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8989 // registers are exhausted.
8990 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8991 // Depending on available argument GPRS, f64 may be passed in a pair of
8992 // GPRs, split between a GPR and the stack, or passed completely on the
8993 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8994 // cases.
8995 MCRegister Reg = allocateArgGPR(State);
8996 if (!Reg) {
8997 int64_t StackOffset = State.AllocateStack(8, Align(8));
8998 State.addLoc(
8999 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9000 return false;
9001 }
9002 LocVT = MVT::i32;
9003 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9004 MCRegister HiReg = allocateArgGPR(State);
9005 if (HiReg) {
9006 State.addLoc(
9007 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
9008 } else {
9009 int64_t StackOffset = State.AllocateStack(4, Align(4));
9010 State.addLoc(
9011 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9012 }
9013 return false;
9014 }
9015
9016 // Split arguments might be passed indirectly, so keep track of the pending
9017 // values.
9018 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9019 LocVT = GRLenVT;
9020 LocInfo = CCValAssign::Indirect;
9021 PendingLocs.push_back(
9022 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
9023 PendingArgFlags.push_back(ArgFlags);
9024 if (!ArgFlags.isSplitEnd()) {
9025 return false;
9026 }
9027 }
9028
9029 // If the split argument only had two elements, it should be passed directly
9030 // in registers or on the stack.
9031 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9032 PendingLocs.size() <= 2) {
9033 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9034 // Apply the normal calling convention rules to the first half of the
9035 // split argument.
9036 CCValAssign VA = PendingLocs[0];
9037 ISD::ArgFlagsTy AF = PendingArgFlags[0];
9038 PendingLocs.clear();
9039 PendingArgFlags.clear();
9040 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
9041 ArgFlags);
9042 }
9043
9044 // Allocate to a register if possible, or else a stack slot.
9045 Register Reg;
9046 unsigned StoreSizeBytes = GRLen / 8;
9047 Align StackAlign = Align(GRLen / 8);
9048
9049 if (ValVT == MVT::f32 && !UseGPRForFloat) {
9050 Reg = State.AllocateReg(ArgFPR32s);
9051 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
9052 Reg = State.AllocateReg(ArgFPR64s);
9053 } else if (ValVT.is128BitVector()) {
9054 Reg = State.AllocateReg(ArgVRs);
9055 UseGPRForFloat = false;
9056 StoreSizeBytes = 16;
9057 StackAlign = Align(16);
9058 } else if (ValVT.is256BitVector()) {
9059 Reg = State.AllocateReg(ArgXRs);
9060 UseGPRForFloat = false;
9061 StoreSizeBytes = 32;
9062 StackAlign = Align(32);
9063 } else {
9064 Reg = allocateArgGPR(State);
9065 }
9066
9067 unsigned StackOffset =
9068 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
9069
9070 // If we reach this point and PendingLocs is non-empty, we must be at the
9071 // end of a split argument that must be passed indirectly.
9072 if (!PendingLocs.empty()) {
9073 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9074 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9075 for (auto &It : PendingLocs) {
9076 if (Reg)
9077 It.convertToReg(Reg);
9078 else
9079 It.convertToMem(StackOffset);
9080 State.addLoc(It);
9081 }
9082 PendingLocs.clear();
9083 PendingArgFlags.clear();
9084 return false;
9085 }
9086 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
9087 "Expected an GRLenVT at this stage");
9088
9089 if (Reg) {
9090 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9091 return false;
9092 }
9093
9094 // When a floating-point value is passed on the stack, no bit-cast is needed.
9095 if (ValVT.isFloatingPoint()) {
9096 LocVT = ValVT;
9097 LocInfo = CCValAssign::Full;
9098 }
9099
9100 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9101 return false;
9102}
9103
9104void LoongArchTargetLowering::analyzeInputArgs(
9105 MachineFunction &MF, CCState &CCInfo,
9106 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
9107 LoongArchCCAssignFn Fn) const {
9108 FunctionType *FType = MF.getFunction().getFunctionType();
9109 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9110 MVT ArgVT = Ins[i].VT;
9111 Type *ArgTy = nullptr;
9112 if (IsRet)
9113 ArgTy = FType->getReturnType();
9114 else if (Ins[i].isOrigArg())
9115 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
9117 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9118 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
9119 CCInfo, IsRet, ArgTy)) {
9120 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
9121 << '\n');
9122 llvm_unreachable("");
9123 }
9124 }
9125}
9126
9127void LoongArchTargetLowering::analyzeOutputArgs(
9128 MachineFunction &MF, CCState &CCInfo,
9129 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9130 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
9131 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9132 MVT ArgVT = Outs[i].VT;
9133 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9135 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9136 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
9137 CCInfo, IsRet, OrigTy)) {
9138 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
9139 << "\n");
9140 llvm_unreachable("");
9141 }
9142 }
9143}
9144
9145// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9146// values.
9148 const CCValAssign &VA, const SDLoc &DL) {
9149 switch (VA.getLocInfo()) {
9150 default:
9151 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9152 case CCValAssign::Full:
9154 break;
9155 case CCValAssign::BCvt:
9156 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9157 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
9158 else
9159 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9160 break;
9161 }
9162 return Val;
9163}
9164
9166 const CCValAssign &VA, const SDLoc &DL,
9167 const ISD::InputArg &In,
9168 const LoongArchTargetLowering &TLI) {
9171 EVT LocVT = VA.getLocVT();
9172 SDValue Val;
9173 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9174 Register VReg = RegInfo.createVirtualRegister(RC);
9175 RegInfo.addLiveIn(VA.getLocReg(), VReg);
9176 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9177
9178 // If input is sign extended from 32 bits, note it for the OptW pass.
9179 if (In.isOrigArg()) {
9180 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
9181 if (OrigArg->getType()->isIntegerTy()) {
9182 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
9183 // An input zero extended from i31 can also be considered sign extended.
9184 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
9185 (BitWidth < 32 && In.Flags.isZExt())) {
9188 LAFI->addSExt32Register(VReg);
9189 }
9190 }
9191 }
9192
9193 return convertLocVTToValVT(DAG, Val, VA, DL);
9194}
9195
9196// The caller is responsible for loading the full value if the argument is
9197// passed with CCValAssign::Indirect.
9199 const CCValAssign &VA, const SDLoc &DL) {
9201 MachineFrameInfo &MFI = MF.getFrameInfo();
9202 EVT ValVT = VA.getValVT();
9203 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9204 /*IsImmutable=*/true);
9205 SDValue FIN = DAG.getFrameIndex(
9207
9208 ISD::LoadExtType ExtType;
9209 switch (VA.getLocInfo()) {
9210 default:
9211 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9212 case CCValAssign::Full:
9214 case CCValAssign::BCvt:
9215 ExtType = ISD::NON_EXTLOAD;
9216 break;
9217 }
9218 return DAG.getExtLoad(
9219 ExtType, DL, VA.getLocVT(), Chain, FIN,
9221}
9222
9224 const CCValAssign &VA,
9225 const CCValAssign &HiVA,
9226 const SDLoc &DL) {
9227 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9228 "Unexpected VA");
9230 MachineFrameInfo &MFI = MF.getFrameInfo();
9232
9233 assert(VA.isRegLoc() && "Expected register VA assignment");
9234
9235 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9236 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9237 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9238 SDValue Hi;
9239 if (HiVA.isMemLoc()) {
9240 // Second half of f64 is passed on the stack.
9241 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
9242 /*IsImmutable=*/true);
9243 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9244 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9246 } else {
9247 // Second half of f64 is passed in another GPR.
9248 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9249 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
9250 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9251 }
9252 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
9253}
9254
9256 const CCValAssign &VA, const SDLoc &DL) {
9257 EVT LocVT = VA.getLocVT();
9258
9259 switch (VA.getLocInfo()) {
9260 default:
9261 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9262 case CCValAssign::Full:
9263 break;
9264 case CCValAssign::BCvt:
9265 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9266 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
9267 else
9268 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9269 break;
9270 }
9271 return Val;
9272}
9273
9274static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9275 CCValAssign::LocInfo LocInfo,
9276 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
9277 CCState &State) {
9278 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9279 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
9280 // s0 s1 s2 s3 s4 s5 s6 s7 s8
9281 static const MCPhysReg GPRList[] = {
9282 LoongArch::R23, LoongArch::R24, LoongArch::R25,
9283 LoongArch::R26, LoongArch::R27, LoongArch::R28,
9284 LoongArch::R29, LoongArch::R30, LoongArch::R31};
9285 if (MCRegister Reg = State.AllocateReg(GPRList)) {
9286 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9287 return false;
9288 }
9289 }
9290
9291 if (LocVT == MVT::f32) {
9292 // Pass in STG registers: F1, F2, F3, F4
9293 // fs0,fs1,fs2,fs3
9294 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9295 LoongArch::F26, LoongArch::F27};
9296 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
9297 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9298 return false;
9299 }
9300 }
9301
9302 if (LocVT == MVT::f64) {
9303 // Pass in STG registers: D1, D2, D3, D4
9304 // fs4,fs5,fs6,fs7
9305 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9306 LoongArch::F30_64, LoongArch::F31_64};
9307 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
9308 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9309 return false;
9310 }
9311 }
9312
9313 report_fatal_error("No registers left in GHC calling convention");
9314 return true;
9315}
9316
9317// Transform physical registers into virtual registers.
9319 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9320 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9321 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9322
9324
9325 switch (CallConv) {
9326 default:
9327 llvm_unreachable("Unsupported calling convention");
9328 case CallingConv::C:
9329 case CallingConv::Fast:
9332 break;
9333 case CallingConv::GHC:
9334 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
9335 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
9337 "GHC calling convention requires the F and D extensions");
9338 }
9339
9340 const Function &Func = MF.getFunction();
9341 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9342 MVT GRLenVT = Subtarget.getGRLenVT();
9343 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9344
9345 // Check if this function has any musttail calls. If so, incoming indirect
9346 // arg pointers must be saved in virtual registers so they survive across
9347 // basic blocks (the SelectionDAG is cleared between BBs). Only do this
9348 // when needed to avoid adding register pressure to non-musttail functions.
9349 bool HasMusttail = llvm::any_of(Func, [](const BasicBlock &BB) {
9350 return llvm::any_of(BB, [](const Instruction &I) {
9351 if (const auto *CI = dyn_cast<CallInst>(&I))
9352 return CI->isMustTailCall();
9353 return false;
9354 });
9355 });
9356 // Used with varargs to acumulate store chains.
9357 std::vector<SDValue> OutChains;
9358
9359 // Assign locations to all of the incoming arguments.
9361 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9362
9363 if (CallConv == CallingConv::GHC)
9365 else
9366 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
9367
9368 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9369 CCValAssign &VA = ArgLocs[i];
9370 SDValue ArgValue;
9371 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9372 // case.
9373 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9374 assert(VA.needsCustom());
9375 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
9376 } else if (VA.isRegLoc())
9377 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
9378 else
9379 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9380 if (VA.getLocInfo() == CCValAssign::Indirect) {
9381 // If the original argument was split and passed by reference, we need to
9382 // load all parts of it here (using the same address).
9383 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9385 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9386 if (HasMusttail) {
9389 Register VReg =
9390 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
9391 Chain = DAG.getCopyToReg(Chain, DL, VReg, ArgValue);
9392 LAFI->setIncomingIndirectArg(ArgIndex, VReg);
9393 }
9394 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9395 assert(ArgPartOffset == 0);
9396 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9397 CCValAssign &PartVA = ArgLocs[i + 1];
9398 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9399 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9400 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9401 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9403 ++i;
9404 ++InsIdx;
9405 }
9406 continue;
9407 }
9408 InVals.push_back(ArgValue);
9409 }
9410
9411 if (IsVarArg) {
9413 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9414 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9415 MachineFrameInfo &MFI = MF.getFrameInfo();
9416 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9417 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9418
9419 // Offset of the first variable argument from stack pointer, and size of
9420 // the vararg save area. For now, the varargs save area is either zero or
9421 // large enough to hold a0-a7.
9422 int VaArgOffset, VarArgsSaveSize;
9423
9424 // If all registers are allocated, then all varargs must be passed on the
9425 // stack and we don't need to save any argregs.
9426 if (ArgRegs.size() == Idx) {
9427 VaArgOffset = CCInfo.getStackSize();
9428 VarArgsSaveSize = 0;
9429 } else {
9430 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9431 VaArgOffset = -VarArgsSaveSize;
9432 }
9433
9434 // Record the frame index of the first variable argument
9435 // which is a value necessary to VASTART.
9436 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9437 LoongArchFI->setVarArgsFrameIndex(FI);
9438
9439 // If saving an odd number of registers then create an extra stack slot to
9440 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9441 // offsets to even-numbered registered remain 2*GRLen-aligned.
9442 if (Idx % 2) {
9443 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
9444 true);
9445 VarArgsSaveSize += GRLenInBytes;
9446 }
9447
9448 // Copy the integer registers that may have been used for passing varargs
9449 // to the vararg save area.
9450 for (unsigned I = Idx; I < ArgRegs.size();
9451 ++I, VaArgOffset += GRLenInBytes) {
9452 const Register Reg = RegInfo.createVirtualRegister(RC);
9453 RegInfo.addLiveIn(ArgRegs[I], Reg);
9454 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
9455 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9456 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9457 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9459 cast<StoreSDNode>(Store.getNode())
9460 ->getMemOperand()
9461 ->setValue((Value *)nullptr);
9462 OutChains.push_back(Store);
9463 }
9464 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9465 }
9466
9467 // All stores are grouped in one node to allow the matching between
9468 // the size of Ins and InVals. This only happens for vararg functions.
9469 if (!OutChains.empty()) {
9470 OutChains.push_back(Chain);
9471 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9472 }
9473
9474 return Chain;
9475}
9476
9478 return CI->isTailCall();
9479}
9480
9481// Check if the return value is used as only a return value, as otherwise
9482// we can't perform a tail-call.
9484 SDValue &Chain) const {
9485 if (N->getNumValues() != 1)
9486 return false;
9487 if (!N->hasNUsesOfValue(1, 0))
9488 return false;
9489
9490 SDNode *Copy = *N->user_begin();
9491 if (Copy->getOpcode() != ISD::CopyToReg)
9492 return false;
9493
9494 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
9495 // isn't safe to perform a tail call.
9496 if (Copy->getGluedNode())
9497 return false;
9498
9499 // The copy must be used by a LoongArchISD::RET, and nothing else.
9500 bool HasRet = false;
9501 for (SDNode *Node : Copy->users()) {
9502 if (Node->getOpcode() != LoongArchISD::RET)
9503 return false;
9504 HasRet = true;
9505 }
9506
9507 if (!HasRet)
9508 return false;
9509
9510 Chain = Copy->getOperand(0);
9511 return true;
9512}
9513
9514// Check whether the call is eligible for tail call optimization.
9515bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
9516 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9517 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
9518
9519 auto CalleeCC = CLI.CallConv;
9520 auto &Outs = CLI.Outs;
9521 auto &Caller = MF.getFunction();
9522 auto CallerCC = Caller.getCallingConv();
9523
9524 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
9525
9526 // Byval parameters hand the function a pointer directly into the stack area
9527 // we want to reuse during a tail call. Working around this *is* possible
9528 // but less efficient and uglier in LowerCall. For musttail, there is no
9529 // workaround today: a byval arg requires a local copy that becomes invalid
9530 // after the tail call deallocates the caller's frame, so rejecting here
9531 // (and triggering reportFatalInternalError in LowerCall) is safer than
9532 // miscompiling.
9533 for (auto &Arg : Outs)
9534 if (Arg.Flags.isByVal())
9535 return false;
9536
9537 // musttail bypasses the remaining checks: the checks either reject cases
9538 // we handle specially (indirect args are forwarded via incoming pointers,
9539 // stack-passed args reuse the matching incoming layout, sret is forwarded
9540 // like any other pointer arg) or are optimizations not applicable to
9541 // mandatory tail calls.
9542 if (IsMustTail)
9543 return true;
9544
9545 // Do not tail call opt if the stack is used to pass parameters.
9546 if (CCInfo.getStackSize() != 0)
9547 return false;
9548
9549 // Do not tail call opt if any parameters need to be passed indirectly.
9550 for (auto &VA : ArgLocs)
9551 if (VA.getLocInfo() == CCValAssign::Indirect)
9552 return false;
9553
9554 // Do not tail call opt if either caller or callee uses struct return
9555 // semantics.
9556 auto IsCallerStructRet = Caller.hasStructRetAttr();
9557 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9558 if (IsCallerStructRet || IsCalleeStructRet)
9559 return false;
9560
9561 // The callee has to preserve all registers the caller needs to preserve.
9562 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
9563 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9564 if (CalleeCC != CallerCC) {
9565 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9566 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9567 return false;
9568 }
9569 return true;
9570}
9571
9573 return DAG.getDataLayout().getPrefTypeAlign(
9574 VT.getTypeForEVT(*DAG.getContext()));
9575}
9576
9577// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9578// and output parameter nodes.
9579SDValue
9581 SmallVectorImpl<SDValue> &InVals) const {
9582 SelectionDAG &DAG = CLI.DAG;
9583 SDLoc &DL = CLI.DL;
9585 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9587 SDValue Chain = CLI.Chain;
9588 SDValue Callee = CLI.Callee;
9589 CallingConv::ID CallConv = CLI.CallConv;
9590 bool IsVarArg = CLI.IsVarArg;
9591 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9592 MVT GRLenVT = Subtarget.getGRLenVT();
9593 bool &IsTailCall = CLI.IsTailCall;
9594
9596
9597 // Analyze the operands of the call, assigning locations to each operand.
9599 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9600
9601 if (CallConv == CallingConv::GHC)
9602 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
9603 else
9604 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
9605
9606 // Check if it's really possible to do a tail call.
9607 if (IsTailCall)
9608 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9609
9610 if (IsTailCall)
9611 ++NumTailCalls;
9612 else if (CLI.CB && CLI.CB->isMustTailCall())
9613 report_fatal_error("failed to perform tail call elimination on a call "
9614 "site marked musttail");
9615
9616 // Get a count of how many bytes are to be pushed on the stack.
9617 unsigned NumBytes = ArgCCInfo.getStackSize();
9618
9619 // Create local copies for byval args.
9620 SmallVector<SDValue> ByValArgs;
9621 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9622 ISD::ArgFlagsTy Flags = Outs[i].Flags;
9623 if (!Flags.isByVal())
9624 continue;
9625
9626 SDValue Arg = OutVals[i];
9627 unsigned Size = Flags.getByValSize();
9628 Align Alignment = Flags.getNonZeroByValAlign();
9629
9630 int FI =
9631 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9632 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9633 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
9634
9635 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
9636 /*IsVolatile=*/false,
9637 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
9639 ByValArgs.push_back(FIPtr);
9640 }
9641
9642 if (!IsTailCall)
9643 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9644
9645 // Copy argument values to their designated locations.
9647 SmallVector<SDValue> MemOpChains;
9648 SDValue StackPtr;
9649 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
9650 ++i, ++OutIdx) {
9651 CCValAssign &VA = ArgLocs[i];
9652 SDValue ArgValue = OutVals[OutIdx];
9653 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
9654
9655 // Handle passing f64 on LA32D with a soft float ABI as a special case.
9656 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9657 assert(VA.isRegLoc() && "Expected register VA assignment");
9658 assert(VA.needsCustom());
9659 SDValue SplitF64 =
9660 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9661 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
9662 SDValue Lo = SplitF64.getValue(0);
9663 SDValue Hi = SplitF64.getValue(1);
9664
9665 Register RegLo = VA.getLocReg();
9666 RegsToPass.push_back(std::make_pair(RegLo, Lo));
9667
9668 // Get the CCValAssign for the Hi part.
9669 CCValAssign &HiVA = ArgLocs[++i];
9670
9671 if (HiVA.isMemLoc()) {
9672 // Second half of f64 is passed on the stack.
9673 if (!StackPtr.getNode())
9674 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9676 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9677 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
9678 // Emit the store.
9679 MemOpChains.push_back(DAG.getStore(
9680 Chain, DL, Hi, Address,
9682 } else {
9683 // Second half of f64 is passed in another GPR.
9684 Register RegHigh = HiVA.getLocReg();
9685 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
9686 }
9687 continue;
9688 }
9689
9690 // Promote the value if needed.
9691 // For now, only handle fully promoted and indirect arguments.
9692 if (VA.getLocInfo() == CCValAssign::Indirect) {
9693 // For musttail calls, reuse incoming indirect pointers instead of
9694 // creating new stack temporaries. The incoming pointers point to the
9695 // caller's caller's frame, which remains valid after a tail call.
9696 if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
9699 unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;
9700
9701 // Resolve which formal parameter is being passed at this call
9702 // position.
9703 //
9704 // FIXME: Ins[].OrigArgIndex is Argument::getArgNo() (unfiltered),
9705 // but Outs[].OrigArgIndex is an index into a filtered arg list
9706 // (empty types removed, via CallLoweringInfo in the target-
9707 // independent layer). IncomingIndirectArgs is keyed by the
9708 // caller's unfiltered Argument::getArgNo(), so we have to walk
9709 // the caller's formals (same filter) to translate the index.
9710 // This target-independent asymmetry should be normalized so
9711 // backends do not need to re-derive the mapping.
9712 //
9713 // Steps:
9714 // 1. Find the call operand at filtered position CallArgIdx.
9715 // 2. If it is an Argument, use getArgNo() directly (same filter
9716 // for caller formals and call operands).
9717 // 3. Otherwise (computed value), walk the caller's formals and
9718 // skip empty types to map the filtered index to getArgNo().
9719 const Argument *FormalArg = nullptr;
9720 unsigned FilteredIdx = 0;
9721 for (const auto &CallArg : CLI.CB->args()) {
9722 if (CallArg->getType()->isEmptyTy())
9723 continue;
9724 if (FilteredIdx == CallArgIdx) {
9725 FormalArg = dyn_cast<Argument>(CallArg);
9726 break;
9727 }
9728 ++FilteredIdx;
9729 }
9730
9731 // For forwarded args, getArgNo() gives the unfiltered index directly.
9732 // For computed args, walk the caller's formals to resolve it.
9733 unsigned FormalArgIdx = CallArgIdx;
9734 if (FormalArg) {
9735 FormalArgIdx = FormalArg->getArgNo();
9736 } else {
9737 FilteredIdx = 0;
9738 for (const auto &Arg : MF.getFunction().args()) {
9739 if (Arg.getType()->isEmptyTy())
9740 continue;
9741 if (FilteredIdx == CallArgIdx) {
9742 FormalArgIdx = Arg.getArgNo();
9743 break;
9744 }
9745 ++FilteredIdx;
9746 }
9747 }
9748
9749 Register VReg = LAFI->getIncomingIndirectArg(FormalArgIdx);
9750 SDValue CopyOp = DAG.getCopyFromReg(Chain, DL, VReg, PtrVT);
9751 // Thread the CopyFromReg output chain through MemOpChains so the
9752 // TokenFactor below sequences the copy with any stores we emit
9753 // for this argument.
9754 MemOpChains.push_back(CopyOp.getValue(1));
9755 SDValue IncomingPtr = CopyOp;
9756
9757 if (!FormalArg) {
9758 // Computed value: store into the incoming indirect pointer for the
9759 // same-position formal parameter (musttail guarantees matching
9760 // prototypes, so types match). The pointer survives the tail call
9761 // since it points to the caller's caller's frame.
9762 //
9763 // The data-flow edge through IncomingPtr already prevents the
9764 // store from being scheduled before the CopyFromReg. Threading
9765 // CopyOp.getValue(1) (the copy's output chain) into the store
9766 // makes that ordering explicit on the chain edge as well, which
9767 // is the convention for memory ops chaining off their producers.
9768 MemOpChains.push_back(
9769 DAG.getStore(CopyOp.getValue(1), DL, ArgValue, IncomingPtr,
9771 // Store any split parts at their respective offsets.
9772 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
9773 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
9774 SDValue PartValue = OutVals[OutIdx + 1];
9775 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
9776 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9777 SDValue Addr =
9778 DAG.getNode(ISD::ADD, DL, PtrVT, IncomingPtr, Offset);
9779 MemOpChains.push_back(
9780 DAG.getStore(CopyOp.getValue(1), DL, PartValue, Addr,
9782 ++i;
9783 ++OutIdx;
9784 }
9785 }
9786 ArgValue = IncomingPtr;
9787
9788 // Skip any remaining split parts (for forwarded args, they are
9789 // covered by the forwarded pointer).
9790 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
9791 ++i;
9792 ++OutIdx;
9793 }
9794 } else {
9795 // Store the argument in a stack slot and pass its address.
9796 Align StackAlign =
9797 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
9798 getPrefTypeAlign(ArgValue.getValueType(), DAG));
9799 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
9800 // If the original argument was split and passed by reference, we need
9801 // to store the required parts of it here (and pass just one address).
9802 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
9803 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
9804 assert(ArgPartOffset == 0);
9805 // Calculate the total size to store. We don't have access to what we're
9806 // actually storing other than performing the loop and collecting the
9807 // info.
9809 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
9810 SDValue PartValue = OutVals[OutIdx + 1];
9811 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
9812 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9813 EVT PartVT = PartValue.getValueType();
9814 StoredSize += PartVT.getStoreSize();
9815 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
9816 Parts.push_back(std::make_pair(PartValue, Offset));
9817 ++i;
9818 ++OutIdx;
9819 }
9820 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
9821 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
9822 MemOpChains.push_back(
9823 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
9825 for (const auto &Part : Parts) {
9826 SDValue PartValue = Part.first;
9827 SDValue PartOffset = Part.second;
9829 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
9830 MemOpChains.push_back(
9831 DAG.getStore(Chain, DL, PartValue, Address,
9833 }
9834 ArgValue = SpillSlot;
9835 }
9836 } else {
9837 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
9838 }
9839
9840 // Use local copy if it is a byval arg.
9841 if (Flags.isByVal())
9842 ArgValue = ByValArgs[j++];
9843
9844 if (VA.isRegLoc()) {
9845 // Queue up the argument copies and emit them at the end.
9846 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
9847 } else {
9848 assert(VA.isMemLoc() && "Argument not register or memory");
9849 assert((!IsTailCall || (CLI.CB && CLI.CB->isMustTailCall())) &&
9850 "Tail call not allowed if stack is used for passing parameters");
9851
9852 // Work out the address of the stack slot.
9853 if (!StackPtr.getNode())
9854 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9856 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9858
9859 // Emit the store.
9860 MemOpChains.push_back(
9861 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
9862 }
9863 }
9864
9865 // Join the stores, which are independent of one another.
9866 if (!MemOpChains.empty())
9867 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
9868
9869 SDValue Glue;
9870
9871 // Build a sequence of copy-to-reg nodes, chained and glued together.
9872 for (auto &Reg : RegsToPass) {
9873 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
9874 Glue = Chain.getValue(1);
9875 }
9876
9877 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
9878 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
9879 // split it and then direct call can be matched by PseudoCALL_SMALL.
9881 const GlobalValue *GV = S->getGlobal();
9882 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
9885 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
9886 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9887 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
9890 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
9891 }
9892
9893 // The first call operand is the chain and the second is the target address.
9895 Ops.push_back(Chain);
9896 Ops.push_back(Callee);
9897
9898 // Add argument registers to the end of the list so that they are
9899 // known live into the call.
9900 for (auto &Reg : RegsToPass)
9901 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
9902
9903 if (!IsTailCall) {
9904 // Add a register mask operand representing the call-preserved registers.
9905 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
9906 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
9907 assert(Mask && "Missing call preserved mask for calling convention");
9908 Ops.push_back(DAG.getRegisterMask(Mask));
9909 }
9910
9911 // Glue the call to the argument copies, if any.
9912 if (Glue.getNode())
9913 Ops.push_back(Glue);
9914
9915 // Emit the call.
9916 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9917 unsigned Op;
9918 switch (DAG.getTarget().getCodeModel()) {
9919 default:
9920 report_fatal_error("Unsupported code model");
9921 case CodeModel::Small:
9922 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
9923 break;
9924 case CodeModel::Medium:
9925 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
9926 break;
9927 case CodeModel::Large:
9928 assert(Subtarget.is64Bit() && "Large code model requires LA64");
9929 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
9930 break;
9931 }
9932
9933 if (IsTailCall) {
9935 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
9936 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
9937 return Ret;
9938 }
9939
9940 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
9941 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
9942 Glue = Chain.getValue(1);
9943
9944 // Mark the end of the call, which is glued to the call itself.
9945 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
9946 Glue = Chain.getValue(1);
9947
9948 // Assign locations to each value returned by this call.
9950 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
9951 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
9952
9953 // Copy all of the result registers out of their specified physreg.
9954 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
9955 auto &VA = RVLocs[i];
9956 // Copy the value out.
9957 SDValue RetValue =
9958 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
9959 // Glue the RetValue to the end of the call sequence.
9960 Chain = RetValue.getValue(1);
9961 Glue = RetValue.getValue(2);
9962
9963 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9964 assert(VA.needsCustom());
9965 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
9966 MVT::i32, Glue);
9967 Chain = RetValue2.getValue(1);
9968 Glue = RetValue2.getValue(2);
9969 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
9970 RetValue, RetValue2);
9971 } else
9972 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
9973
9974 InVals.push_back(RetValue);
9975 }
9976
9977 return Chain;
9978}
9979
9981 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
9982 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
9983 const Type *RetTy) const {
9985 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
9986
9987 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9988 LoongArchABI::ABI ABI =
9989 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9990 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
9991 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
9992 return false;
9993 }
9994 return true;
9995}
9996
9998 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
10000 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
10001 SelectionDAG &DAG) const {
10002 // Stores the assignment of the return value to a location.
10004
10005 // Info about the registers and stack slot.
10006 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10007 *DAG.getContext());
10008
10009 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10010 nullptr, CC_LoongArch);
10011 if (CallConv == CallingConv::GHC && !RVLocs.empty())
10012 report_fatal_error("GHC functions return void only");
10013 SDValue Glue;
10014 SmallVector<SDValue, 4> RetOps(1, Chain);
10015
10016 // Copy the result values into the output registers.
10017 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
10018 SDValue Val = OutVals[OutIdx];
10019 CCValAssign &VA = RVLocs[i];
10020 assert(VA.isRegLoc() && "Can only return in registers!");
10021
10022 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10023 // Handle returning f64 on LA32D with a soft float ABI.
10024 assert(VA.isRegLoc() && "Expected return via registers");
10025 assert(VA.needsCustom());
10026 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10027 DAG.getVTList(MVT::i32, MVT::i32), Val);
10028 SDValue Lo = SplitF64.getValue(0);
10029 SDValue Hi = SplitF64.getValue(1);
10030 Register RegLo = VA.getLocReg();
10031 Register RegHi = RVLocs[++i].getLocReg();
10032
10033 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
10034 Glue = Chain.getValue(1);
10035 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
10036 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
10037 Glue = Chain.getValue(1);
10038 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
10039 } else {
10040 // Handle a 'normal' return.
10041 Val = convertValVTToLocVT(DAG, Val, VA, DL);
10042 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
10043
10044 // Guarantee that all emitted copies are stuck together.
10045 Glue = Chain.getValue(1);
10046 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
10047 }
10048 }
10049
10050 RetOps[0] = Chain; // Update chain.
10051
10052 // Add the glue node if we have it.
10053 if (Glue.getNode())
10054 RetOps.push_back(Glue);
10055
10056 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
10057}
10058
10059// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
10060// Note: The following prefixes are excluded:
10061// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
10062// as they can be represented using [x]vrepli.[whb]
10064 const APInt &SplatValue, const unsigned SplatBitSize) const {
10065 uint64_t RequiredImm = 0;
10066 uint64_t V = SplatValue.getZExtValue();
10067 if (SplatBitSize == 16 && !(V & 0x00FF)) {
10068 // 4'b0101
10069 RequiredImm = (0b10101 << 8) | (V >> 8);
10070 return {true, RequiredImm};
10071 } else if (SplatBitSize == 32) {
10072 // 4'b0001
10073 if (!(V & 0xFFFF00FF)) {
10074 RequiredImm = (0b10001 << 8) | (V >> 8);
10075 return {true, RequiredImm};
10076 }
10077 // 4'b0010
10078 if (!(V & 0xFF00FFFF)) {
10079 RequiredImm = (0b10010 << 8) | (V >> 16);
10080 return {true, RequiredImm};
10081 }
10082 // 4'b0011
10083 if (!(V & 0x00FFFFFF)) {
10084 RequiredImm = (0b10011 << 8) | (V >> 24);
10085 return {true, RequiredImm};
10086 }
10087 // 4'b0110
10088 if ((V & 0xFFFF00FF) == 0xFF) {
10089 RequiredImm = (0b10110 << 8) | (V >> 8);
10090 return {true, RequiredImm};
10091 }
10092 // 4'b0111
10093 if ((V & 0xFF00FFFF) == 0xFFFF) {
10094 RequiredImm = (0b10111 << 8) | (V >> 16);
10095 return {true, RequiredImm};
10096 }
10097 // 4'b1010
10098 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
10099 RequiredImm =
10100 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10101 return {true, RequiredImm};
10102 }
10103 } else if (SplatBitSize == 64) {
10104 // 4'b1011
10105 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
10106 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
10107 RequiredImm =
10108 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10109 return {true, RequiredImm};
10110 }
10111 // 4'b1100
10112 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
10113 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
10114 RequiredImm =
10115 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
10116 return {true, RequiredImm};
10117 }
10118 // 4'b1001
10119 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
10120 uint8_t res = 0;
10121 for (int i = 0; i < 8; ++i) {
10122 uint8_t byte = x & 0xFF;
10123 if (byte == 0 || byte == 0xFF)
10124 res |= ((byte & 1) << i);
10125 else
10126 return {false, 0};
10127 x >>= 8;
10128 }
10129 return {true, res};
10130 };
10131 auto [IsSame, Suffix] = sameBitsPreByte(V);
10132 if (IsSame) {
10133 RequiredImm = (0b11001 << 8) | Suffix;
10134 return {true, RequiredImm};
10135 }
10136 }
10137 return {false, RequiredImm};
10138}
10139
10141 EVT VT) const {
10142 if (!Subtarget.hasExtLSX())
10143 return false;
10144
10145 if (VT == MVT::f32) {
10146 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
10147 return (masked == 0x3e000000 || masked == 0x40000000);
10148 }
10149
10150 if (VT == MVT::f64) {
10151 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
10152 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
10153 }
10154
10155 return false;
10156}
10157
10158bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
10159 bool ForCodeSize) const {
10160 // TODO: Maybe need more checks here after vector extension is supported.
10161 if (VT == MVT::f32 && !Subtarget.hasBasicF())
10162 return false;
10163 if (VT == MVT::f64 && !Subtarget.hasBasicD())
10164 return false;
10165 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
10166}
10167
10169 return true;
10170}
10171
10173 return true;
10174}
10175
10176bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
10177 const Instruction *I) const {
10178 if (!Subtarget.is64Bit())
10179 return isa<LoadInst>(I) || isa<StoreInst>(I);
10180
10181 if (isa<LoadInst>(I))
10182 return true;
10183
10184 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
10185 // require fences beacuse we can use amswap_db.[w/d].
10186 Type *Ty = I->getOperand(0)->getType();
10187 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
10188 unsigned Size = Ty->getIntegerBitWidth();
10189 return (Size == 8 || Size == 16);
10190 }
10191
10192 return false;
10193}
10194
10196 LLVMContext &Context,
10197 EVT VT) const {
10198 if (!VT.isVector())
10199 return getPointerTy(DL);
10201}
10202
10204 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
10205 // Do not merge to float value size (128 or 256 bits) if no implicit
10206 // float attribute is set.
10207 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
10208 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
10209 if (NoFloat)
10210 return MemVT.getSizeInBits() <= MaxIntSize;
10211
10212 // Make sure we don't merge greater than our maximum supported vector width.
10213 if (Subtarget.hasExtLASX())
10214 MaxIntSize = 256;
10215 else if (Subtarget.hasExtLSX())
10216 MaxIntSize = 128;
10217
10218 return MemVT.getSizeInBits() <= MaxIntSize;
10219}
10220
10222 EVT VT = Y.getValueType();
10223
10224 if (VT.isVector())
10225 return Subtarget.hasExtLSX() && VT.isInteger();
10226
10227 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
10228}
10229
10232 MachineFunction &MF, unsigned Intrinsic) const {
10233 switch (Intrinsic) {
10234 default:
10235 return;
10236 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
10237 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
10238 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
10239 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
10240 IntrinsicInfo Info;
10242 Info.memVT = MVT::i32;
10243 Info.ptrVal = I.getArgOperand(0);
10244 Info.offset = 0;
10245 Info.align = Align(4);
10248 Infos.push_back(Info);
10249 return;
10250 // TODO: Add more Intrinsics later.
10251 }
10252 }
10253}
10254
10255// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
10256// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
10257// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
10258// regression, we need to implement it manually.
10261
10263 Op == AtomicRMWInst::And) &&
10264 "Unable to expand");
10265 unsigned MinWordSize = 4;
10266
10267 IRBuilder<> Builder(AI);
10268 LLVMContext &Ctx = Builder.getContext();
10269 const DataLayout &DL = AI->getDataLayout();
10270 Type *ValueType = AI->getType();
10271 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
10272
10273 Value *Addr = AI->getPointerOperand();
10274 PointerType *PtrTy = cast<PointerType>(Addr->getType());
10275 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
10276
10277 Value *AlignedAddr = Builder.CreateIntrinsic(
10278 Intrinsic::ptrmask, {PtrTy, IntTy},
10279 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
10280 "AlignedAddr");
10281
10282 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
10283 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
10284 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
10285 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
10286 Value *Mask = Builder.CreateShl(
10287 ConstantInt::get(WordType,
10288 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
10289 ShiftAmt, "Mask");
10290 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
10291 Value *ValOperand_Shifted =
10292 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
10293 ShiftAmt, "ValOperand_Shifted");
10294 Value *NewOperand;
10295 if (Op == AtomicRMWInst::And)
10296 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
10297 else
10298 NewOperand = ValOperand_Shifted;
10299
10300 AtomicRMWInst *NewAI =
10301 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
10302 AI->getOrdering(), AI->getSyncScopeID());
10303
10304 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
10305 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
10306 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
10307 AI->replaceAllUsesWith(FinalOldResult);
10308 AI->eraseFromParent();
10309}
10310
10313 const AtomicRMWInst *AI) const {
10314 // TODO: Add more AtomicRMWInst that needs to be extended.
10315
10316 // Since floating-point operation requires a non-trivial set of data
10317 // operations, use CmpXChg to expand.
10318 if (AI->isFloatingPointOperation() ||
10324
10325 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
10328 AI->getOperation() == AtomicRMWInst::Sub)) {
10330 }
10331
10332 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10333 if (Subtarget.hasLAMCAS()) {
10334 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
10338 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
10340 }
10341
10342 if (Size == 8 || Size == 16)
10345}
10346
10347static Intrinsic::ID
10349 AtomicRMWInst::BinOp BinOp) {
10350 if (GRLen == 64) {
10351 switch (BinOp) {
10352 default:
10353 llvm_unreachable("Unexpected AtomicRMW BinOp");
10355 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
10356 case AtomicRMWInst::Add:
10357 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
10358 case AtomicRMWInst::Sub:
10359 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
10361 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
10363 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
10365 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
10366 case AtomicRMWInst::Max:
10367 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
10368 case AtomicRMWInst::Min:
10369 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
10370 // TODO: support other AtomicRMWInst.
10371 }
10372 }
10373
10374 if (GRLen == 32) {
10375 switch (BinOp) {
10376 default:
10377 llvm_unreachable("Unexpected AtomicRMW BinOp");
10379 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
10380 case AtomicRMWInst::Add:
10381 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
10382 case AtomicRMWInst::Sub:
10383 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
10385 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
10387 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
10389 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
10390 case AtomicRMWInst::Max:
10391 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
10392 case AtomicRMWInst::Min:
10393 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
10394 // TODO: support other AtomicRMWInst.
10395 }
10396 }
10397
10398 llvm_unreachable("Unexpected GRLen\n");
10399}
10400
10403 const AtomicCmpXchgInst *CI) const {
10404
10405 if (Subtarget.hasLAMCAS())
10407
10409 if (Size == 8 || Size == 16)
10412}
10413
10415 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10416 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10417 unsigned GRLen = Subtarget.getGRLen();
10418 AtomicOrdering FailOrd = CI->getFailureOrdering();
10419 Value *FailureOrdering =
10420 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
10421 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
10422 if (GRLen == 64) {
10423 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
10424 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10425 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10426 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10427 }
10428 Type *Tys[] = {AlignedAddr->getType()};
10429 Value *Result = Builder.CreateIntrinsic(
10430 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10431 if (GRLen == 64)
10432 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10433 return Result;
10434}
10435
10437 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10438 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10439 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10440 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10441 // mask, as this produces better code than the LL/SC loop emitted by
10442 // int_loongarch_masked_atomicrmw_xchg.
10443 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10446 if (CVal->isZero())
10447 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
10448 Builder.CreateNot(Mask, "Inv_Mask"),
10449 AI->getAlign(), Ord);
10450 if (CVal->isMinusOne())
10451 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
10452 AI->getAlign(), Ord);
10453 }
10454
10455 unsigned GRLen = Subtarget.getGRLen();
10456 Value *Ordering =
10457 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
10458 Type *Tys[] = {AlignedAddr->getType()};
10460 AI->getModule(),
10462
10463 if (GRLen == 64) {
10464 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10465 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10466 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10467 }
10468
10469 Value *Result;
10470
10471 // Must pass the shift amount needed to sign extend the loaded value prior
10472 // to performing a signed comparison for min/max. ShiftAmt is the number of
10473 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10474 // is the number of bits to left+right shift the value in order to
10475 // sign-extend.
10476 if (AI->getOperation() == AtomicRMWInst::Min ||
10478 const DataLayout &DL = AI->getDataLayout();
10479 unsigned ValWidth =
10480 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10481 Value *SextShamt =
10482 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
10483 Result = Builder.CreateCall(LlwOpScwLoop,
10484 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10485 } else {
10486 Result =
10487 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10488 }
10489
10490 if (GRLen == 64)
10491 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10492 return Result;
10493}
10494
10496 const MachineFunction &MF, EVT VT) const {
10497 VT = VT.getScalarType();
10498
10499 if (!VT.isSimple())
10500 return false;
10501
10502 switch (VT.getSimpleVT().SimpleTy) {
10503 case MVT::f32:
10504 case MVT::f64:
10505 return true;
10506 default:
10507 break;
10508 }
10509
10510 return false;
10511}
10512
10514 const Constant *PersonalityFn) const {
10515 return LoongArch::R4;
10516}
10517
10519 const Constant *PersonalityFn) const {
10520 return LoongArch::R5;
10521}
10522
10523//===----------------------------------------------------------------------===//
10524// Target Optimization Hooks
10525//===----------------------------------------------------------------------===//
10526
10528 const LoongArchSubtarget &Subtarget) {
10529 // Feature FRECIPE instrucions relative accuracy is 2^-14.
10530 // IEEE float has 23 digits and double has 52 digits.
10531 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
10532 return RefinementSteps;
10533}
10534
10535static bool
10537 assert(Subtarget.hasFrecipe() &&
10538 "Reciprocal estimate queried on unsupported target");
10539
10540 if (!VT.isSimple())
10541 return false;
10542
10543 switch (VT.getSimpleVT().SimpleTy) {
10544 case MVT::f32:
10545 // f32 is the base type for reciprocal estimate instructions.
10546 return true;
10547
10548 case MVT::f64:
10549 return Subtarget.hasBasicD();
10550
10551 case MVT::v4f32:
10552 case MVT::v2f64:
10553 return Subtarget.hasExtLSX();
10554
10555 case MVT::v8f32:
10556 case MVT::v4f64:
10557 return Subtarget.hasExtLASX();
10558
10559 default:
10560 return false;
10561 }
10562}
10563
10565 SelectionDAG &DAG, int Enabled,
10566 int &RefinementSteps,
10567 bool &UseOneConstNR,
10568 bool Reciprocal) const {
10570 "Enabled should never be Disabled here");
10571
10572 if (!Subtarget.hasFrecipe())
10573 return SDValue();
10574
10575 SDLoc DL(Operand);
10576 EVT VT = Operand.getValueType();
10577
10578 // Check supported types.
10579 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10580 return SDValue();
10581
10582 // Handle refinement steps.
10583 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10584 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10585
10586 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
10587 UseOneConstNR = false;
10588 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
10589
10590 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
10591 // are needed (which rely on the reciprocal form), return the raw reciprocal
10592 // estimate.
10593 if (Reciprocal || RefinementSteps > 0)
10594 return Rsqrt;
10595
10596 // Otherwise, return sqrt(x) by multiplying with the operand.
10597 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
10598}
10599
10601 SelectionDAG &DAG,
10602 int Enabled,
10603 int &RefinementSteps) const {
10605 "Enabled should never be Disabled here");
10606
10607 if (!Subtarget.hasFrecipe())
10608 return SDValue();
10609
10610 SDLoc DL(Operand);
10611 EVT VT = Operand.getValueType();
10612
10613 // Check supported types.
10614 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10615 return SDValue();
10616
10617 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10618 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10619
10620 // FRECIPE computes 1.0 / x.
10621 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
10622}
10623
10624//===----------------------------------------------------------------------===//
10625// LoongArch Inline Assembly Support
10626//===----------------------------------------------------------------------===//
10627
10629LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
10630 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
10631 //
10632 // 'f': A floating-point register (if available).
10633 // 'k': A memory operand whose address is formed by a base register and
10634 // (optionally scaled) index register.
10635 // 'l': A signed 16-bit constant.
10636 // 'm': A memory operand whose address is formed by a base register and
10637 // offset that is suitable for use in instructions with the same
10638 // addressing mode as st.w and ld.w.
10639 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
10640 // instruction)
10641 // 'I': A signed 12-bit constant (for arithmetic instructions).
10642 // 'J': Integer zero.
10643 // 'K': An unsigned 12-bit constant (for logic instructions).
10644 // "ZB": An address that is held in a general-purpose register. The offset is
10645 // zero.
10646 // "ZC": A memory operand whose address is formed by a base register and
10647 // offset that is suitable for use in instructions with the same
10648 // addressing mode as ll.w and sc.w.
10649 if (Constraint.size() == 1) {
10650 switch (Constraint[0]) {
10651 default:
10652 break;
10653 case 'f':
10654 case 'q':
10655 return C_RegisterClass;
10656 case 'l':
10657 case 'I':
10658 case 'J':
10659 case 'K':
10660 return C_Immediate;
10661 case 'k':
10662 return C_Memory;
10663 }
10664 }
10665
10666 if (Constraint == "ZC" || Constraint == "ZB")
10667 return C_Memory;
10668
10669 // 'm' is handled here.
10670 return TargetLowering::getConstraintType(Constraint);
10671}
10672
10673InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
10674 StringRef ConstraintCode) const {
10675 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
10679 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
10680}
10681
10682std::pair<unsigned, const TargetRegisterClass *>
10683LoongArchTargetLowering::getRegForInlineAsmConstraint(
10684 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
10685 // First, see if this is a constraint that directly corresponds to a LoongArch
10686 // register class.
10687 if (Constraint.size() == 1) {
10688 switch (Constraint[0]) {
10689 case 'r':
10690 // TODO: Support fixed vectors up to GRLen?
10691 if (VT.isVector())
10692 break;
10693 return std::make_pair(0U, &LoongArch::GPRRegClass);
10694 case 'q':
10695 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
10696 case 'f':
10697 if (Subtarget.hasBasicF() && VT == MVT::f32)
10698 return std::make_pair(0U, &LoongArch::FPR32RegClass);
10699 if (Subtarget.hasBasicD() && VT == MVT::f64)
10700 return std::make_pair(0U, &LoongArch::FPR64RegClass);
10701 if (Subtarget.hasExtLSX() &&
10702 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
10703 return std::make_pair(0U, &LoongArch::LSX128RegClass);
10704 if (Subtarget.hasExtLASX() &&
10705 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
10706 return std::make_pair(0U, &LoongArch::LASX256RegClass);
10707 break;
10708 default:
10709 break;
10710 }
10711 }
10712
10713 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
10714 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
10715 // constraints while the official register name is prefixed with a '$'. So we
10716 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
10717 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
10718 // case insensitive, so no need to convert the constraint to upper case here.
10719 //
10720 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
10721 // decode the usage of register name aliases into their official names. And
10722 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
10723 // official register names.
10724 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
10725 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
10726 bool IsFP = Constraint[2] == 'f';
10727 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
10728 std::pair<unsigned, const TargetRegisterClass *> R;
10730 TRI, join_items("", Temp.first, Temp.second), VT);
10731 // Match those names to the widest floating point register type available.
10732 if (IsFP) {
10733 unsigned RegNo = R.first;
10734 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
10735 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
10736 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
10737 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
10738 }
10739 }
10740 }
10741 return R;
10742 }
10743
10744 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10745}
10746
10747void LoongArchTargetLowering::LowerAsmOperandForConstraint(
10748 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
10749 SelectionDAG &DAG) const {
10750 // Currently only support length 1 constraints.
10751 if (Constraint.size() == 1) {
10752 switch (Constraint[0]) {
10753 case 'l':
10754 // Validate & create a 16-bit signed immediate operand.
10755 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10756 uint64_t CVal = C->getSExtValue();
10757 if (isInt<16>(CVal))
10758 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10759 Subtarget.getGRLenVT()));
10760 }
10761 return;
10762 case 'I':
10763 // Validate & create a 12-bit signed immediate operand.
10764 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10765 uint64_t CVal = C->getSExtValue();
10766 if (isInt<12>(CVal))
10767 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10768 Subtarget.getGRLenVT()));
10769 }
10770 return;
10771 case 'J':
10772 // Validate & create an integer zero operand.
10773 if (auto *C = dyn_cast<ConstantSDNode>(Op))
10774 if (C->getZExtValue() == 0)
10775 Ops.push_back(
10776 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
10777 return;
10778 case 'K':
10779 // Validate & create a 12-bit unsigned immediate operand.
10780 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10781 uint64_t CVal = C->getZExtValue();
10782 if (isUInt<12>(CVal))
10783 Ops.push_back(
10784 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
10785 }
10786 return;
10787 default:
10788 break;
10789 }
10790 }
10792}
10793
10794#define GET_REGISTER_MATCHER
10795#include "LoongArchGenAsmMatcher.inc"
10796
10799 const MachineFunction &MF) const {
10800 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
10801 std::string NewRegName = Name.second.str();
10802 Register Reg = MatchRegisterAltName(NewRegName);
10803 if (!Reg)
10804 Reg = MatchRegisterName(NewRegName);
10805 if (!Reg)
10806 return Reg;
10807 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
10808 if (!ReservedRegs.test(Reg))
10809 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
10810 StringRef(RegName) + "\"."));
10811 return Reg;
10812}
10813
10815 EVT VT, SDValue C) const {
10816 // TODO: Support vectors.
10817 if (!VT.isScalarInteger())
10818 return false;
10819
10820 // Omit the optimization if the data size exceeds GRLen.
10821 if (VT.getSizeInBits() > Subtarget.getGRLen())
10822 return false;
10823
10824 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
10825 const APInt &Imm = ConstNode->getAPIntValue();
10826 // Break MUL into (SLLI + ADD/SUB) or ALSL.
10827 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
10828 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
10829 return true;
10830 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
10831 if (ConstNode->hasOneUse() &&
10832 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
10833 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
10834 return true;
10835 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
10836 // in which the immediate has two set bits. Or Break (MUL x, imm)
10837 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
10838 // equals to (1 << s0) - (1 << s1).
10839 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
10840 unsigned Shifts = Imm.countr_zero();
10841 // Reject immediates which can be composed via a single LUI.
10842 if (Shifts >= 12)
10843 return false;
10844 // Reject multiplications can be optimized to
10845 // (SLLI (ALSL x, x, 1/2/3/4), s).
10846 APInt ImmPop = Imm.ashr(Shifts);
10847 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
10848 return false;
10849 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
10850 // since it needs one more instruction than other 3 cases.
10851 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
10852 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
10853 (ImmSmall - Imm).isPowerOf2())
10854 return true;
10855 }
10856 }
10857
10858 return false;
10859}
10860
10862 const AddrMode &AM,
10863 Type *Ty, unsigned AS,
10864 Instruction *I) const {
10865 // LoongArch has four basic addressing modes:
10866 // 1. reg
10867 // 2. reg + 12-bit signed offset
10868 // 3. reg + 14-bit signed offset left-shifted by 2
10869 // 4. reg1 + reg2
10870 // TODO: Add more checks after support vector extension.
10871
10872 // No global is ever allowed as a base.
10873 if (AM.BaseGV)
10874 return false;
10875
10876 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
10877 // with `UAL` feature.
10878 if (!isInt<12>(AM.BaseOffs) &&
10879 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
10880 return false;
10881
10882 switch (AM.Scale) {
10883 case 0:
10884 // "r+i" or just "i", depending on HasBaseReg.
10885 break;
10886 case 1:
10887 // "r+r+i" is not allowed.
10888 if (AM.HasBaseReg && AM.BaseOffs)
10889 return false;
10890 // Otherwise we have "r+r" or "r+i".
10891 break;
10892 case 2:
10893 // "2*r+r" or "2*r+i" is not allowed.
10894 if (AM.HasBaseReg || AM.BaseOffs)
10895 return false;
10896 // Allow "2*r" as "r+r".
10897 break;
10898 default:
10899 return false;
10900 }
10901
10902 return true;
10903}
10904
10906 return isInt<12>(Imm);
10907}
10908
10910 return isInt<12>(Imm);
10911}
10912
10914 // Zexts are free if they can be combined with a load.
10915 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
10916 // poorly with type legalization of compares preferring sext.
10917 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
10918 EVT MemVT = LD->getMemoryVT();
10919 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
10920 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
10921 LD->getExtensionType() == ISD::ZEXTLOAD))
10922 return true;
10923 }
10924
10925 return TargetLowering::isZExtFree(Val, VT2);
10926}
10927
10929 EVT DstVT) const {
10930 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
10931}
10932
10934 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
10935}
10936
10938 // TODO: Support vectors.
10939 if (Y.getValueType().isVector())
10940 return false;
10941
10942 return !isa<ConstantSDNode>(Y);
10943}
10944
10946 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
10947 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
10948}
10949
10951 Type *Ty, bool IsSigned) const {
10952 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
10953 return true;
10954
10955 return IsSigned;
10956}
10957
10959 // Return false to suppress the unnecessary extensions if the LibCall
10960 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
10961 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
10962 Type.getSizeInBits() < Subtarget.getGRLen()))
10963 return false;
10964 return true;
10965}
10966
10967// memcpy, and other memory intrinsics, typically tries to use wider load/store
10968// if the source/dest is aligned and the copy size is large enough. We therefore
10969// want to align such objects passed to memory intrinsics.
10971 unsigned &MinSize,
10972 Align &PrefAlign) const {
10973 if (!isa<MemIntrinsic>(CI))
10974 return false;
10975
10976 if (Subtarget.is64Bit()) {
10977 MinSize = 8;
10978 PrefAlign = Align(8);
10979 } else {
10980 MinSize = 4;
10981 PrefAlign = Align(4);
10982 }
10983
10984 return true;
10985}
10986
10989 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
10990 VT.getVectorElementType() != MVT::i1)
10991 return TypeWidenVector;
10992
10994}
10995
10996bool LoongArchTargetLowering::splitValueIntoRegisterParts(
10997 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
10998 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
10999 bool IsABIRegCopy = CC.has_value();
11000 EVT ValueVT = Val.getValueType();
11001
11002 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11003 PartVT == MVT::f32) {
11004 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
11005 // nan, and cast to f32.
11006 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
11007 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
11008 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
11009 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
11010 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
11011 Parts[0] = Val;
11012 return true;
11013 }
11014
11015 return false;
11016}
11017
11018SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
11019 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11020 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
11021 bool IsABIRegCopy = CC.has_value();
11022
11023 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11024 PartVT == MVT::f32) {
11025 SDValue Val = Parts[0];
11026
11027 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
11028 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
11029 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
11030 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
11031 return Val;
11032 }
11033
11034 return SDValue();
11035}
11036
11037MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
11038 CallingConv::ID CC,
11039 EVT VT) const {
11040 // Use f32 to pass f16.
11041 if (VT == MVT::f16 && Subtarget.hasBasicF())
11042 return MVT::f32;
11043
11045}
11046
11047unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
11048 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
11049 // Use f32 to pass f16.
11050 if (VT == MVT::f16 && Subtarget.hasBasicF())
11051 return 1;
11052
11054}
11055
11057 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
11058 const SelectionDAG &DAG, unsigned Depth) const {
11059 unsigned Opc = Op.getOpcode();
11060 Known.resetAll();
11061 switch (Opc) {
11062 default:
11063 break;
11064 case LoongArchISD::VPICK_ZEXT_ELT: {
11065 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
11066 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
11067 unsigned VTBits = VT.getScalarSizeInBits();
11068 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
11069 Known.Zero.setBitsFrom(VTBits);
11070 break;
11071 }
11072 }
11073}
11074
11076 SDValue Op, const APInt &OriginalDemandedBits,
11077 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
11078 unsigned Depth) const {
11079 EVT VT = Op.getValueType();
11080 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
11081 unsigned Opc = Op.getOpcode();
11082 switch (Opc) {
11083 default:
11084 break;
11085 case LoongArchISD::VMSKLTZ:
11086 case LoongArchISD::XVMSKLTZ: {
11087 SDValue Src = Op.getOperand(0);
11088 MVT SrcVT = Src.getSimpleValueType();
11089 unsigned SrcBits = SrcVT.getScalarSizeInBits();
11090 unsigned NumElts = SrcVT.getVectorNumElements();
11091
11092 // If we don't need the sign bits at all just return zero.
11093 if (OriginalDemandedBits.countr_zero() >= NumElts)
11094 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
11095
11096 // Only demand the vector elements of the sign bits we need.
11097 APInt KnownUndef, KnownZero;
11098 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
11099 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
11100 TLO, Depth + 1))
11101 return true;
11102
11103 Known.Zero = KnownZero.zext(BitWidth);
11104 Known.Zero.setHighBits(BitWidth - NumElts);
11105
11106 // [X]VMSKLTZ only uses the MSB from each vector element.
11107 KnownBits KnownSrc;
11108 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
11109 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
11110 Depth + 1))
11111 return true;
11112
11113 if (KnownSrc.One[SrcBits - 1])
11114 Known.One.setLowBits(NumElts);
11115 else if (KnownSrc.Zero[SrcBits - 1])
11116 Known.Zero.setLowBits(NumElts);
11117
11118 // Attempt to avoid multi-use ops if we don't need anything from it.
11120 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
11121 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
11122 return false;
11123 }
11124 }
11125
11127 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
11128}
11129
11131 unsigned Opc = VecOp.getOpcode();
11132
11133 // Assume target opcodes can't be scalarized.
11134 // TODO - do we have any exceptions?
11135 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
11136 return false;
11137
11138 // If the vector op is not supported, try to convert to scalar.
11139 EVT VecVT = VecOp.getValueType();
11141 return true;
11142
11143 // If the vector op is supported, but the scalar op is not, the transform may
11144 // not be worthwhile.
11145 EVT ScalarVT = VecVT.getScalarType();
11146 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
11147}
11148
11150 unsigned Index) const {
11152 return false;
11153
11154 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
11155 return Index == 0;
11156}
11157
11159 unsigned Index) const {
11160 EVT EltVT = VT.getScalarType();
11161
11162 // Extract a scalar FP value from index 0 of a vector is free.
11163 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
11164}
11165
11167 const MachineFunction &MF) const {
11168
11169 // If the function specifically requests inline stack probes, emit them.
11170 if (MF.getFunction().hasFnAttribute("probe-stack"))
11171 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11172 "inline-asm";
11173
11174 return false;
11175}
11176
11178 Align StackAlign) const {
11179 // The default stack probe size is 4096 if the function has no
11180 // stack-probe-size attribute.
11181 const Function &Fn = MF.getFunction();
11182 unsigned StackProbeSize =
11183 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
11184 // Round down to the stack alignment.
11185 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
11186 return StackProbeSize ? StackProbeSize : StackAlign.value();
11187}
11188
11189SDValue
11190LoongArchTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
11191 SelectionDAG &DAG) const {
11193 if (!hasInlineStackProbe(MF))
11194 return SDValue();
11195
11196 const MVT GRLenVT = Subtarget.getGRLenVT();
11197 // Get the inputs.
11198 SDValue Chain = Op.getOperand(0);
11199 SDValue Size = Op.getOperand(1);
11200
11201 const MaybeAlign Align =
11202 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11203 const SDLoc dl(Op);
11204 const EVT VT = Op.getValueType();
11205
11206 // Construct the new SP value in a GPR.
11207 SDValue SP = DAG.getCopyFromReg(Chain, dl, LoongArch::R3, GRLenVT);
11208 Chain = SP.getValue(1);
11209 SP = DAG.getNode(ISD::SUB, dl, GRLenVT, SP, Size);
11210 if (Align)
11211 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11212 DAG.getSignedConstant(-Align->value(), dl, VT));
11213
11214 // Set the real SP to the new value with a probing loop.
11215 Chain = DAG.getNode(LoongArchISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
11216 return DAG.getMergeValues({SP, Chain}, dl);
11217}
11218
11221 MachineBasicBlock *MBB) const {
11222 MachineFunction &MF = *MBB->getParent();
11223 MachineBasicBlock::iterator MBBI = MI.getIterator();
11224 DebugLoc DL = MBB->findDebugLoc(MBBI);
11225 const Register TargetReg = MI.getOperand(0).getReg();
11226
11227 const LoongArchInstrInfo *TII = Subtarget.getInstrInfo();
11228 const bool IsLA64 = Subtarget.is64Bit();
11229 const Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
11230 const LoongArchTargetLowering *TLI = Subtarget.getTargetLowering();
11231 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
11232
11233 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
11234 MachineBasicBlock *const LoopTestMBB =
11235 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11236 MF.insert(MBBInsertPoint, LoopTestMBB);
11237 MachineBasicBlock *const ExitMBB =
11238 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11239 MF.insert(MBBInsertPoint, ExitMBB);
11240 const Register SPReg = LoongArch::R3;
11241 const Register ScratchReg =
11242 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
11243
11244 // ScratchReg = ProbeSize
11245 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
11246
11247 // LoopTest:
11248 // sub.{w/d} $sp, $sp, ScratchReg
11249 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11250 TII->get(IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W), SPReg)
11251 .addReg(SPReg)
11252 .addReg(ScratchReg);
11253
11254 // st.{w/d} $zero, $sp, 0
11255 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11256 TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
11257 .addReg(LoongArch::R0)
11258 .addReg(SPReg)
11259 .addImm(0);
11260
11261 // bltu TargetReg, $sp, LoopTest
11262 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(LoongArch::BLTU))
11263 .addReg(TargetReg)
11264 .addReg(SPReg)
11265 .addMBB(LoopTestMBB);
11266
11267 // move $sp, TargetReg
11268 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(LoongArch::OR), SPReg)
11269 .addReg(TargetReg)
11270 .addReg(LoongArch::R0);
11271
11272 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
11274
11275 LoopTestMBB->addSuccessor(ExitMBB);
11276 LoopTestMBB->addSuccessor(LoopTestMBB);
11277 MBB->addSuccessor(LoopTestMBB);
11278
11279 MI.eraseFromParent();
11280 MF.getInfo<LoongArchMachineFunctionInfo>()->setDynamicAllocation();
11281 return ExitMBB->begin()->getParent();
11282}
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static bool buildVPERMIInfo(ArrayRef< int > Mask, SDValue V1, SDValue V2, SmallVectorImpl< SDValue > &SrcVec, unsigned &MaskImm)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1521
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition Argument.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
size_type count() const
Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:501
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
iterator_range< arg_iterator > args()
Definition Function.h:892
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2868
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
void setIncomingIndirectArg(unsigned ArgIndex, Register Reg)
Register getIncomingIndirectArg(unsigned ArgIndex) const
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this function.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...