LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
407
413
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
421 Legal);
423 VT, Legal);
430 Expand);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
459 VT, Expand);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
473 }
474
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
485 }
486
487 // Set DAG combine for 'LASX' feature.
488 if (Subtarget.hasExtLASX()) {
492 }
493
494 // Compute derived properties from the register classes.
495 computeRegisterProperties(Subtarget.getRegisterInfo());
496
498
501
502 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
503
505
506 // Function alignments.
508 // Set preferred alignments.
509 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
510 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
511 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
512
513 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
514 if (Subtarget.hasLAMCAS())
516
517 if (Subtarget.hasSCQ()) {
520 }
521
522 // Disable strict node mutation.
523 IsStrictFPEnabled = true;
524}
525
527 const GlobalAddressSDNode *GA) const {
528 // In order to maximise the opportunity for common subexpression elimination,
529 // keep a separate ADD node for the global address offset instead of folding
530 // it in the global address node. Later peephole optimisations may choose to
531 // fold it back in when profitable.
532 return false;
533}
534
536 SelectionDAG &DAG) const {
537 switch (Op.getOpcode()) {
539 return lowerATOMIC_FENCE(Op, DAG);
541 return lowerEH_DWARF_CFA(Op, DAG);
543 return lowerGlobalAddress(Op, DAG);
545 return lowerGlobalTLSAddress(Op, DAG);
547 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
549 return lowerINTRINSIC_W_CHAIN(Op, DAG);
551 return lowerINTRINSIC_VOID(Op, DAG);
553 return lowerBlockAddress(Op, DAG);
554 case ISD::JumpTable:
555 return lowerJumpTable(Op, DAG);
556 case ISD::SHL_PARTS:
557 return lowerShiftLeftParts(Op, DAG);
558 case ISD::SRA_PARTS:
559 return lowerShiftRightParts(Op, DAG, true);
560 case ISD::SRL_PARTS:
561 return lowerShiftRightParts(Op, DAG, false);
563 return lowerConstantPool(Op, DAG);
564 case ISD::FP_TO_SINT:
565 return lowerFP_TO_SINT(Op, DAG);
566 case ISD::BITCAST:
567 return lowerBITCAST(Op, DAG);
568 case ISD::UINT_TO_FP:
569 return lowerUINT_TO_FP(Op, DAG);
570 case ISD::SINT_TO_FP:
571 return lowerSINT_TO_FP(Op, DAG);
572 case ISD::VASTART:
573 return lowerVASTART(Op, DAG);
574 case ISD::FRAMEADDR:
575 return lowerFRAMEADDR(Op, DAG);
576 case ISD::RETURNADDR:
577 return lowerRETURNADDR(Op, DAG);
579 return lowerWRITE_REGISTER(Op, DAG);
581 return lowerINSERT_VECTOR_ELT(Op, DAG);
583 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
585 return lowerBUILD_VECTOR(Op, DAG);
587 return lowerCONCAT_VECTORS(Op, DAG);
589 return lowerVECTOR_SHUFFLE(Op, DAG);
590 case ISD::BITREVERSE:
591 return lowerBITREVERSE(Op, DAG);
593 return lowerSCALAR_TO_VECTOR(Op, DAG);
594 case ISD::PREFETCH:
595 return lowerPREFETCH(Op, DAG);
596 case ISD::SELECT:
597 return lowerSELECT(Op, DAG);
598 case ISD::BRCOND:
599 return lowerBRCOND(Op, DAG);
600 case ISD::FP_TO_FP16:
601 return lowerFP_TO_FP16(Op, DAG);
602 case ISD::FP16_TO_FP:
603 return lowerFP16_TO_FP(Op, DAG);
604 case ISD::FP_TO_BF16:
605 return lowerFP_TO_BF16(Op, DAG);
606 case ISD::BF16_TO_FP:
607 return lowerBF16_TO_FP(Op, DAG);
609 return lowerVECREDUCE_ADD(Op, DAG);
610 case ISD::ROTL:
611 case ISD::ROTR:
612 return lowerRotate(Op, DAG);
620 return lowerVECREDUCE(Op, DAG);
621 case ISD::ConstantFP:
622 return lowerConstantFP(Op, DAG);
623 case ISD::SETCC:
624 return lowerSETCC(Op, DAG);
625 }
626 return SDValue();
627}
628
629// Helper to attempt to return a cheaper, bit-inverted version of \p V.
631 // TODO: don't always ignore oneuse constraints.
632 V = peekThroughBitcasts(V);
633 EVT VT = V.getValueType();
634
635 // Match not(xor X, -1) -> X.
636 if (V.getOpcode() == ISD::XOR &&
637 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
638 isAllOnesConstant(V.getOperand(1))))
639 return V.getOperand(0);
640
641 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
642 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
643 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
644 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
645 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
646 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
647 V.getOperand(1));
648 }
649 }
650
651 // Match not(SplatVector(not(X)) -> SplatVector(X).
652 if (V.getOpcode() == ISD::BUILD_VECTOR) {
653 if (SDValue SplatValue =
654 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
655 if (!V->isOnlyUserOf(SplatValue.getNode()))
656 return SDValue();
657
658 if (SDValue Not = isNOT(SplatValue, DAG)) {
659 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
660 return DAG.getSplat(VT, SDLoc(Not), Not);
661 }
662 }
663 }
664
665 // Match not(or(not(X),not(Y))) -> and(X, Y).
666 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
667 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
668 // TODO: Handle cases with single NOT operand -> VANDN
669 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
670 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
671 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
672 DAG.getBitcast(VT, Op1));
673 }
674
675 // TODO: Add more matching patterns. Such as,
676 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
677 // not(slt(C, X)) -> slt(X - 1, C)
678
679 return SDValue();
680}
681
682SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
683 SelectionDAG &DAG) const {
684 EVT VT = Op.getValueType();
685 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
686 const APFloat &FPVal = CFP->getValueAPF();
687 SDLoc DL(CFP);
688
689 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
690 (VT == MVT::f64 && Subtarget.hasBasicD()));
691
692 // If value is 0.0 or -0.0, just ignore it.
693 if (FPVal.isZero())
694 return SDValue();
695
696 // If lsx enabled, use cheaper 'vldi' instruction if possible.
697 if (isFPImmVLDILegal(FPVal, VT))
698 return SDValue();
699
700 // Construct as integer, and move to float register.
701 APInt INTVal = FPVal.bitcastToAPInt();
702
703 // If more than MaterializeFPImmInsNum instructions will be used to
704 // generate the INTVal and move it to float register, fallback to
705 // use floating point load from the constant pool.
707 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
708 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
709 return SDValue();
710
711 switch (VT.getSimpleVT().SimpleTy) {
712 default:
713 llvm_unreachable("Unexpected floating point type!");
714 break;
715 case MVT::f32: {
716 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
717 if (Subtarget.is64Bit())
718 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
719 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
720 : LoongArchISD::MOVGR2FR_W,
721 DL, VT, NewVal);
722 }
723 case MVT::f64: {
724 if (Subtarget.is64Bit()) {
725 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
727 }
728 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
729 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
730 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
731 }
732 }
733
734 return SDValue();
735}
736
737// Ensure SETCC result and operand have the same bit width; isel does not
738// support mismatched widths.
739SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
740 SelectionDAG &DAG) const {
741 SDLoc DL(Op);
742 EVT ResultVT = Op.getValueType();
743 EVT OperandVT = Op.getOperand(0).getValueType();
744
745 EVT SetCCResultVT =
746 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
747
748 if (ResultVT == SetCCResultVT)
749 return Op;
750
751 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
752 "SETCC operands must have the same type!");
753
754 SDValue SetCCNode =
755 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
756 Op.getOperand(1), Op.getOperand(2));
757
758 if (ResultVT.bitsGT(SetCCResultVT))
759 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
760 else if (ResultVT.bitsLT(SetCCResultVT))
761 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
762
763 return SetCCNode;
764}
765
766// Lower vecreduce_add using vhaddw instructions.
767// For Example:
768// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
769// can be lowered to:
770// VHADDW_D_W vr0, vr0, vr0
771// VHADDW_Q_D vr0, vr0, vr0
772// VPICKVE2GR_D a0, vr0, 0
773// ADDI_W a0, a0, 0
774SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
775 SelectionDAG &DAG) const {
776
777 SDLoc DL(Op);
778 MVT OpVT = Op.getSimpleValueType();
779 SDValue Val = Op.getOperand(0);
780
781 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
782 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
783 unsigned ResBits = OpVT.getScalarSizeInBits();
784
785 unsigned LegalVecSize = 128;
786 bool isLASX256Vector =
787 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
788
789 // Ensure operand type legal or enable it legal.
790 while (!isTypeLegal(Val.getSimpleValueType())) {
791 Val = DAG.WidenVector(Val, DL);
792 }
793
794 // NumEles is designed for iterations count, v4i32 for LSX
795 // and v8i32 for LASX should have the same count.
796 if (isLASX256Vector) {
797 NumEles /= 2;
798 LegalVecSize = 256;
799 }
800
801 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
802 MVT IntTy = MVT::getIntegerVT(EleBits);
803 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
804 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
805 }
806
807 if (isLASX256Vector) {
808 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
809 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
810 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
811 }
812
813 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
814 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
815 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
816}
817
818// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
819// For Example:
820// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
821// can be lowered to:
822// VBSRL_V vr1, vr0, 8
823// VMAX_W vr0, vr1, vr0
824// VBSRL_V vr1, vr0, 4
825// VMAX_W vr0, vr1, vr0
826// VPICKVE2GR_W a0, vr0, 0
827// For 256 bit vector, it is illegal and will be spilt into
828// two 128 bit vector by default then processed by this.
829SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
830 SelectionDAG &DAG) const {
831 SDLoc DL(Op);
832
833 MVT OpVT = Op.getSimpleValueType();
834 SDValue Val = Op.getOperand(0);
835
836 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
837 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
838
839 // Ensure operand type legal or enable it legal.
840 while (!isTypeLegal(Val.getSimpleValueType())) {
841 Val = DAG.WidenVector(Val, DL);
842 }
843
844 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
845 MVT VecTy = Val.getSimpleValueType();
846 MVT GRLenVT = Subtarget.getGRLenVT();
847
848 for (int i = NumEles; i > 1; i /= 2) {
849 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
850 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
851 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
852 }
853
854 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
855 DAG.getConstant(0, DL, GRLenVT));
856}
857
858SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
859 SelectionDAG &DAG) const {
860 unsigned IsData = Op.getConstantOperandVal(4);
861
862 // We don't support non-data prefetch.
863 // Just preserve the chain.
864 if (!IsData)
865 return Op.getOperand(0);
866
867 return Op;
868}
869
870SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
871 SelectionDAG &DAG) const {
872 MVT VT = Op.getSimpleValueType();
873 assert(VT.isVector() && "Unexpected type");
874
875 SDLoc DL(Op);
876 SDValue R = Op.getOperand(0);
877 SDValue Amt = Op.getOperand(1);
878 unsigned Opcode = Op.getOpcode();
879 unsigned EltSizeInBits = VT.getScalarSizeInBits();
880
881 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
882 if (V.getOpcode() != ISD::BUILD_VECTOR)
883 return false;
884 if (SDValue SplatValue =
885 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
886 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
887 CstSplatValue = C->getAPIntValue();
888 return true;
889 }
890 }
891 return false;
892 };
893
894 // Check for constant splat rotation amount.
895 APInt CstSplatValue;
896 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
897 bool isROTL = Opcode == ISD::ROTL;
898
899 // Check for splat rotate by zero.
900 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
901 return R;
902
903 // LoongArch targets always prefer ISD::ROTR.
904 if (isROTL) {
905 SDValue Zero = DAG.getConstant(0, DL, VT);
906 return DAG.getNode(ISD::ROTR, DL, VT, R,
907 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
908 }
909
910 // Rotate by a immediate.
911 if (IsCstSplat) {
912 // ISD::ROTR: Attemp to rotate by a positive immediate.
913 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
914 if (SDValue Urem =
915 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
916 return DAG.getNode(Opcode, DL, VT, R, Urem);
917 }
918
919 return Op;
920}
921
922// Return true if Val is equal to (setcc LHS, RHS, CC).
923// Return false if Val is the inverse of (setcc LHS, RHS, CC).
924// Otherwise, return std::nullopt.
925static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
926 ISD::CondCode CC, SDValue Val) {
927 assert(Val->getOpcode() == ISD::SETCC);
928 SDValue LHS2 = Val.getOperand(0);
929 SDValue RHS2 = Val.getOperand(1);
930 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
931
932 if (LHS == LHS2 && RHS == RHS2) {
933 if (CC == CC2)
934 return true;
935 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
936 return false;
937 } else if (LHS == RHS2 && RHS == LHS2) {
939 if (CC == CC2)
940 return true;
941 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
942 return false;
943 }
944
945 return std::nullopt;
946}
947
949 const LoongArchSubtarget &Subtarget) {
950 SDValue CondV = N->getOperand(0);
951 SDValue TrueV = N->getOperand(1);
952 SDValue FalseV = N->getOperand(2);
953 MVT VT = N->getSimpleValueType(0);
954 SDLoc DL(N);
955
956 // (select c, -1, y) -> -c | y
957 if (isAllOnesConstant(TrueV)) {
958 SDValue Neg = DAG.getNegative(CondV, DL, VT);
959 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
960 }
961 // (select c, y, -1) -> (c-1) | y
962 if (isAllOnesConstant(FalseV)) {
963 SDValue Neg =
964 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
965 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
966 }
967
968 // (select c, 0, y) -> (c-1) & y
969 if (isNullConstant(TrueV)) {
970 SDValue Neg =
971 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
972 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
973 }
974 // (select c, y, 0) -> -c & y
975 if (isNullConstant(FalseV)) {
976 SDValue Neg = DAG.getNegative(CondV, DL, VT);
977 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
978 }
979
980 // select c, ~x, x --> xor -c, x
981 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
982 const APInt &TrueVal = TrueV->getAsAPIntVal();
983 const APInt &FalseVal = FalseV->getAsAPIntVal();
984 if (~TrueVal == FalseVal) {
985 SDValue Neg = DAG.getNegative(CondV, DL, VT);
986 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
987 }
988 }
989
990 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
991 // when both truev and falsev are also setcc.
992 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
993 FalseV.getOpcode() == ISD::SETCC) {
994 SDValue LHS = CondV.getOperand(0);
995 SDValue RHS = CondV.getOperand(1);
996 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
997
998 // (select x, x, y) -> x | y
999 // (select !x, x, y) -> x & y
1000 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1001 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1002 DAG.getFreeze(FalseV));
1003 }
1004 // (select x, y, x) -> x & y
1005 // (select !x, y, x) -> x | y
1006 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1007 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1008 DAG.getFreeze(TrueV), FalseV);
1009 }
1010 }
1011
1012 return SDValue();
1013}
1014
1015// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1016// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1017// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1018// being `0` or `-1`. In such cases we can replace `select` with `and`.
1019// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1020// than `c0`?
1021static SDValue
1023 const LoongArchSubtarget &Subtarget) {
1024 unsigned SelOpNo = 0;
1025 SDValue Sel = BO->getOperand(0);
1026 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1027 SelOpNo = 1;
1028 Sel = BO->getOperand(1);
1029 }
1030
1031 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1032 return SDValue();
1033
1034 unsigned ConstSelOpNo = 1;
1035 unsigned OtherSelOpNo = 2;
1036 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1037 ConstSelOpNo = 2;
1038 OtherSelOpNo = 1;
1039 }
1040 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1041 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1042 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1043 return SDValue();
1044
1045 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1046 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1047 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1048 return SDValue();
1049
1050 SDLoc DL(Sel);
1051 EVT VT = BO->getValueType(0);
1052
1053 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1054 if (SelOpNo == 1)
1055 std::swap(NewConstOps[0], NewConstOps[1]);
1056
1057 SDValue NewConstOp =
1058 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1059 if (!NewConstOp)
1060 return SDValue();
1061
1062 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1063 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1064 return SDValue();
1065
1066 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1067 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1068 if (SelOpNo == 1)
1069 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1070 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1071
1072 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1073 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1074 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1075}
1076
1077// Changes the condition code and swaps operands if necessary, so the SetCC
1078// operation matches one of the comparisons supported directly by branches
1079// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1080// compare with 1/-1.
1082 ISD::CondCode &CC, SelectionDAG &DAG) {
1083 // If this is a single bit test that can't be handled by ANDI, shift the
1084 // bit to be tested to the MSB and perform a signed compare with 0.
1085 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1086 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1087 isa<ConstantSDNode>(LHS.getOperand(1))) {
1088 uint64_t Mask = LHS.getConstantOperandVal(1);
1089 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1090 unsigned ShAmt = 0;
1091 if (isPowerOf2_64(Mask)) {
1092 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1093 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1094 } else {
1095 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1096 }
1097
1098 LHS = LHS.getOperand(0);
1099 if (ShAmt != 0)
1100 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1101 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1102 return;
1103 }
1104 }
1105
1106 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1107 int64_t C = RHSC->getSExtValue();
1108 switch (CC) {
1109 default:
1110 break;
1111 case ISD::SETGT:
1112 // Convert X > -1 to X >= 0.
1113 if (C == -1) {
1114 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1115 CC = ISD::SETGE;
1116 return;
1117 }
1118 break;
1119 case ISD::SETLT:
1120 // Convert X < 1 to 0 >= X.
1121 if (C == 1) {
1122 RHS = LHS;
1123 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1124 CC = ISD::SETGE;
1125 return;
1126 }
1127 break;
1128 }
1129 }
1130
1131 switch (CC) {
1132 default:
1133 break;
1134 case ISD::SETGT:
1135 case ISD::SETLE:
1136 case ISD::SETUGT:
1137 case ISD::SETULE:
1139 std::swap(LHS, RHS);
1140 break;
1141 }
1142}
1143
1144SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1145 SelectionDAG &DAG) const {
1146 SDValue CondV = Op.getOperand(0);
1147 SDValue TrueV = Op.getOperand(1);
1148 SDValue FalseV = Op.getOperand(2);
1149 SDLoc DL(Op);
1150 MVT VT = Op.getSimpleValueType();
1151 MVT GRLenVT = Subtarget.getGRLenVT();
1152
1153 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1154 return V;
1155
1156 if (Op.hasOneUse()) {
1157 unsigned UseOpc = Op->user_begin()->getOpcode();
1158 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1159 SDNode *BinOp = *Op->user_begin();
1160 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1161 DAG, Subtarget)) {
1162 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1163 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1164 // may return a constant node and cause crash in lowerSELECT.
1165 if (NewSel.getOpcode() == ISD::SELECT)
1166 return lowerSELECT(NewSel, DAG);
1167 return NewSel;
1168 }
1169 }
1170 }
1171
1172 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1173 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1174 // (select condv, truev, falsev)
1175 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1176 if (CondV.getOpcode() != ISD::SETCC ||
1177 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1178 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1179 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1180
1181 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1182
1183 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1184 }
1185
1186 // If the CondV is the output of a SETCC node which operates on GRLenVT
1187 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1188 // to take advantage of the integer compare+branch instructions. i.e.: (select
1189 // (setcc lhs, rhs, cc), truev, falsev)
1190 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1191 SDValue LHS = CondV.getOperand(0);
1192 SDValue RHS = CondV.getOperand(1);
1193 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1194
1195 // Special case for a select of 2 constants that have a difference of 1.
1196 // Normally this is done by DAGCombine, but if the select is introduced by
1197 // type legalization or op legalization, we miss it. Restricting to SETLT
1198 // case for now because that is what signed saturating add/sub need.
1199 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1200 // but we would probably want to swap the true/false values if the condition
1201 // is SETGE/SETLE to avoid an XORI.
1202 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1203 CCVal == ISD::SETLT) {
1204 const APInt &TrueVal = TrueV->getAsAPIntVal();
1205 const APInt &FalseVal = FalseV->getAsAPIntVal();
1206 if (TrueVal - 1 == FalseVal)
1207 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1208 if (TrueVal + 1 == FalseVal)
1209 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1210 }
1211
1212 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1213 // 1 < x ? x : 1 -> 0 < x ? x : 1
1214 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1215 RHS == TrueV && LHS == FalseV) {
1216 LHS = DAG.getConstant(0, DL, VT);
1217 // 0 <u x is the same as x != 0.
1218 if (CCVal == ISD::SETULT) {
1219 std::swap(LHS, RHS);
1220 CCVal = ISD::SETNE;
1221 }
1222 }
1223
1224 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1225 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1226 RHS == FalseV) {
1227 RHS = DAG.getConstant(0, DL, VT);
1228 }
1229
1230 SDValue TargetCC = DAG.getCondCode(CCVal);
1231
1232 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1233 // (select (setcc lhs, rhs, CC), constant, falsev)
1234 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1235 std::swap(TrueV, FalseV);
1236 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1237 }
1238
1239 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1240 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1241}
1242
1243SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1244 SelectionDAG &DAG) const {
1245 SDValue CondV = Op.getOperand(1);
1246 SDLoc DL(Op);
1247 MVT GRLenVT = Subtarget.getGRLenVT();
1248
1249 if (CondV.getOpcode() == ISD::SETCC) {
1250 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1251 SDValue LHS = CondV.getOperand(0);
1252 SDValue RHS = CondV.getOperand(1);
1253 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1254
1255 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1256
1257 SDValue TargetCC = DAG.getCondCode(CCVal);
1258 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1259 Op.getOperand(0), LHS, RHS, TargetCC,
1260 Op.getOperand(2));
1261 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1262 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1263 Op.getOperand(0), CondV, Op.getOperand(2));
1264 }
1265 }
1266
1267 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1268 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1269 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1270}
1271
1272SDValue
1273LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1274 SelectionDAG &DAG) const {
1275 SDLoc DL(Op);
1276 MVT OpVT = Op.getSimpleValueType();
1277
1278 SDValue Vector = DAG.getUNDEF(OpVT);
1279 SDValue Val = Op.getOperand(0);
1280 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1281
1282 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1283}
1284
1285SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1286 SelectionDAG &DAG) const {
1287 EVT ResTy = Op->getValueType(0);
1288 SDValue Src = Op->getOperand(0);
1289 SDLoc DL(Op);
1290
1291 // LoongArchISD::BITREV_8B is not supported on LA32.
1292 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1293 return SDValue();
1294
1295 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1296 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1297 unsigned int NewEltNum = NewVT.getVectorNumElements();
1298
1299 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1300
1302 for (unsigned int i = 0; i < NewEltNum; i++) {
1303 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1304 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1305 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1306 ? (unsigned)LoongArchISD::BITREV_8B
1307 : (unsigned)ISD::BITREVERSE;
1308 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1309 }
1310 SDValue Res =
1311 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1312
1313 switch (ResTy.getSimpleVT().SimpleTy) {
1314 default:
1315 return SDValue();
1316 case MVT::v16i8:
1317 case MVT::v32i8:
1318 return Res;
1319 case MVT::v8i16:
1320 case MVT::v16i16:
1321 case MVT::v4i32:
1322 case MVT::v8i32: {
1324 for (unsigned int i = 0; i < NewEltNum; i++)
1325 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1326 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1327 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1328 }
1329 }
1330}
1331
1332// Widen element type to get a new mask value (if possible).
1333// For example:
1334// shufflevector <4 x i32> %a, <4 x i32> %b,
1335// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1336// is equivalent to:
1337// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1338// can be lowered to:
1339// VPACKOD_D vr0, vr0, vr1
1341 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1342 unsigned EltBits = VT.getScalarSizeInBits();
1343
1344 if (EltBits > 32 || EltBits == 1)
1345 return SDValue();
1346
1347 SmallVector<int, 8> NewMask;
1348 if (widenShuffleMaskElts(Mask, NewMask)) {
1349 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1350 : MVT::getIntegerVT(EltBits * 2);
1351 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1352 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1353 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1354 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1355 return DAG.getBitcast(
1356 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1357 }
1358 }
1359
1360 return SDValue();
1361}
1362
1363/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1364/// instruction.
1365// The funciton matches elements from one of the input vector shuffled to the
1366// left or right with zeroable elements 'shifted in'. It handles both the
1367// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1368// lane.
1369// Mostly copied from X86.
1370static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1371 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1372 int MaskOffset, const APInt &Zeroable) {
1373 int Size = Mask.size();
1374 unsigned SizeInBits = Size * ScalarSizeInBits;
1375
1376 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1377 for (int i = 0; i < Size; i += Scale)
1378 for (int j = 0; j < Shift; ++j)
1379 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1380 return false;
1381
1382 return true;
1383 };
1384
1385 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1386 int Step = 1) {
1387 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1388 if (!(Mask[i] == -1 || Mask[i] == Low))
1389 return false;
1390 return true;
1391 };
1392
1393 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1394 for (int i = 0; i != Size; i += Scale) {
1395 unsigned Pos = Left ? i + Shift : i;
1396 unsigned Low = Left ? i : i + Shift;
1397 unsigned Len = Scale - Shift;
1398 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1399 return -1;
1400 }
1401
1402 int ShiftEltBits = ScalarSizeInBits * Scale;
1403 bool ByteShift = ShiftEltBits > 64;
1404 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1405 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1406 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1407
1408 // Normalize the scale for byte shifts to still produce an i64 element
1409 // type.
1410 Scale = ByteShift ? Scale / 2 : Scale;
1411
1412 // We need to round trip through the appropriate type for the shift.
1413 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1414 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1415 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1416 return (int)ShiftAmt;
1417 };
1418
1419 unsigned MaxWidth = 128;
1420 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1421 for (int Shift = 1; Shift != Scale; ++Shift)
1422 for (bool Left : {true, false})
1423 if (CheckZeros(Shift, Scale, Left)) {
1424 int ShiftAmt = MatchShift(Shift, Scale, Left);
1425 if (0 < ShiftAmt)
1426 return ShiftAmt;
1427 }
1428
1429 // no match
1430 return -1;
1431}
1432
1433/// Lower VECTOR_SHUFFLE as shift (if possible).
1434///
1435/// For example:
1436/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1437/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1438/// is lowered to:
1439/// (VBSLL_V $v0, $v0, 4)
1440///
1441/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1442/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1443/// is lowered to:
1444/// (VSLLI_D $v0, $v0, 32)
1446 MVT VT, SDValue V1, SDValue V2,
1447 SelectionDAG &DAG,
1448 const LoongArchSubtarget &Subtarget,
1449 const APInt &Zeroable) {
1450 int Size = Mask.size();
1451 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1452
1453 MVT ShiftVT;
1454 SDValue V = V1;
1455 unsigned Opcode;
1456
1457 // Try to match shuffle against V1 shift.
1458 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1459 Mask, 0, Zeroable);
1460
1461 // If V1 failed, try to match shuffle against V2 shift.
1462 if (ShiftAmt < 0) {
1463 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1464 Mask, Size, Zeroable);
1465 V = V2;
1466 }
1467
1468 if (ShiftAmt < 0)
1469 return SDValue();
1470
1471 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1472 "Illegal integer vector type");
1473 V = DAG.getBitcast(ShiftVT, V);
1474 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1475 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1476 return DAG.getBitcast(VT, V);
1477}
1478
1479/// Determine whether a range fits a regular pattern of values.
1480/// This function accounts for the possibility of jumping over the End iterator.
1481template <typename ValType>
1482static bool
1484 unsigned CheckStride,
1486 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1487 auto &I = Begin;
1488
1489 while (I != End) {
1490 if (*I != -1 && *I != ExpectedIndex)
1491 return false;
1492 ExpectedIndex += ExpectedIndexStride;
1493
1494 // Incrementing past End is undefined behaviour so we must increment one
1495 // step at a time and check for End at each step.
1496 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1497 ; // Empty loop body.
1498 }
1499 return true;
1500}
1501
1502/// Compute whether each element of a shuffle is zeroable.
1503///
1504/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1506 SDValue V2, APInt &KnownUndef,
1507 APInt &KnownZero) {
1508 int Size = Mask.size();
1509 KnownUndef = KnownZero = APInt::getZero(Size);
1510
1511 V1 = peekThroughBitcasts(V1);
1512 V2 = peekThroughBitcasts(V2);
1513
1514 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1515 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1516
1517 int VectorSizeInBits = V1.getValueSizeInBits();
1518 int ScalarSizeInBits = VectorSizeInBits / Size;
1519 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1520 (void)ScalarSizeInBits;
1521
1522 for (int i = 0; i < Size; ++i) {
1523 int M = Mask[i];
1524 if (M < 0) {
1525 KnownUndef.setBit(i);
1526 continue;
1527 }
1528 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1529 KnownZero.setBit(i);
1530 continue;
1531 }
1532 }
1533}
1534
1535/// Test whether a shuffle mask is equivalent within each sub-lane.
1536///
1537/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1538/// non-trivial to compute in the face of undef lanes. The representation is
1539/// suitable for use with existing 128-bit shuffles as entries from the second
1540/// vector have been remapped to [LaneSize, 2*LaneSize).
1541static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1542 ArrayRef<int> Mask,
1543 SmallVectorImpl<int> &RepeatedMask) {
1544 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1545 RepeatedMask.assign(LaneSize, -1);
1546 int Size = Mask.size();
1547 for (int i = 0; i < Size; ++i) {
1548 assert(Mask[i] == -1 || Mask[i] >= 0);
1549 if (Mask[i] < 0)
1550 continue;
1551 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1552 // This entry crosses lanes, so there is no way to model this shuffle.
1553 return false;
1554
1555 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1556 // Adjust second vector indices to start at LaneSize instead of Size.
1557 int LocalM =
1558 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1559 if (RepeatedMask[i % LaneSize] < 0)
1560 // This is the first non-undef entry in this slot of a 128-bit lane.
1561 RepeatedMask[i % LaneSize] = LocalM;
1562 else if (RepeatedMask[i % LaneSize] != LocalM)
1563 // Found a mismatch with the repeated mask.
1564 return false;
1565 }
1566 return true;
1567}
1568
1569/// Attempts to match vector shuffle as byte rotation.
1571 ArrayRef<int> Mask) {
1572
1573 SDValue Lo, Hi;
1574 SmallVector<int, 16> RepeatedMask;
1575
1576 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1577 return -1;
1578
1579 int NumElts = RepeatedMask.size();
1580 int Rotation = 0;
1581 int Scale = 16 / NumElts;
1582
1583 for (int i = 0; i < NumElts; ++i) {
1584 int M = RepeatedMask[i];
1585 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1586 "Unexpected mask index.");
1587 if (M < 0)
1588 continue;
1589
1590 // Determine where a rotated vector would have started.
1591 int StartIdx = i - (M % NumElts);
1592 if (StartIdx == 0)
1593 return -1;
1594
1595 // If we found the tail of a vector the rotation must be the missing
1596 // front. If we found the head of a vector, it must be how much of the
1597 // head.
1598 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1599
1600 if (Rotation == 0)
1601 Rotation = CandidateRotation;
1602 else if (Rotation != CandidateRotation)
1603 return -1;
1604
1605 // Compute which value this mask is pointing at.
1606 SDValue MaskV = M < NumElts ? V1 : V2;
1607
1608 // Compute which of the two target values this index should be assigned
1609 // to. This reflects whether the high elements are remaining or the low
1610 // elements are remaining.
1611 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1612
1613 // Either set up this value if we've not encountered it before, or check
1614 // that it remains consistent.
1615 if (!TargetV)
1616 TargetV = MaskV;
1617 else if (TargetV != MaskV)
1618 return -1;
1619 }
1620
1621 // Check that we successfully analyzed the mask, and normalize the results.
1622 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1623 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1624 if (!Lo)
1625 Lo = Hi;
1626 else if (!Hi)
1627 Hi = Lo;
1628
1629 V1 = Lo;
1630 V2 = Hi;
1631
1632 return Rotation * Scale;
1633}
1634
1635/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1636///
1637/// For example:
1638/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1639/// <2 x i32> <i32 3, i32 0>
1640/// is lowered to:
1641/// (VBSRL_V $v1, $v1, 8)
1642/// (VBSLL_V $v0, $v0, 8)
1643/// (VOR_V $v0, $V0, $v1)
1644static SDValue
1646 SDValue V1, SDValue V2, SelectionDAG &DAG,
1647 const LoongArchSubtarget &Subtarget) {
1648
1649 SDValue Lo = V1, Hi = V2;
1650 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1651 if (ByteRotation <= 0)
1652 return SDValue();
1653
1654 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1655 Lo = DAG.getBitcast(ByteVT, Lo);
1656 Hi = DAG.getBitcast(ByteVT, Hi);
1657
1658 int LoByteShift = 16 - ByteRotation;
1659 int HiByteShift = ByteRotation;
1660 MVT GRLenVT = Subtarget.getGRLenVT();
1661
1662 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1663 DAG.getConstant(LoByteShift, DL, GRLenVT));
1664 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1665 DAG.getConstant(HiByteShift, DL, GRLenVT));
1666 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1667}
1668
1669/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1670///
1671/// For example:
1672/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1673/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1674/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1675/// is lowered to:
1676/// (VREPLI $v1, 0)
1677/// (VILVL $v0, $v1, $v0)
1679 ArrayRef<int> Mask, MVT VT,
1680 SDValue V1, SDValue V2,
1681 SelectionDAG &DAG,
1682 const APInt &Zeroable) {
1683 int Bits = VT.getSizeInBits();
1684 int EltBits = VT.getScalarSizeInBits();
1685 int NumElements = VT.getVectorNumElements();
1686
1687 if (Zeroable.isAllOnes())
1688 return DAG.getConstant(0, DL, VT);
1689
1690 // Define a helper function to check a particular ext-scale and lower to it if
1691 // valid.
1692 auto Lower = [&](int Scale) -> SDValue {
1693 SDValue InputV;
1694 bool AnyExt = true;
1695 int Offset = 0;
1696 for (int i = 0; i < NumElements; i++) {
1697 int M = Mask[i];
1698 if (M < 0)
1699 continue;
1700 if (i % Scale != 0) {
1701 // Each of the extended elements need to be zeroable.
1702 if (!Zeroable[i])
1703 return SDValue();
1704
1705 AnyExt = false;
1706 continue;
1707 }
1708
1709 // Each of the base elements needs to be consecutive indices into the
1710 // same input vector.
1711 SDValue V = M < NumElements ? V1 : V2;
1712 M = M % NumElements;
1713 if (!InputV) {
1714 InputV = V;
1715 Offset = M - (i / Scale);
1716
1717 // These offset can't be handled
1718 if (Offset % (NumElements / Scale))
1719 return SDValue();
1720 } else if (InputV != V)
1721 return SDValue();
1722
1723 if (M != (Offset + (i / Scale)))
1724 return SDValue(); // Non-consecutive strided elements.
1725 }
1726
1727 // If we fail to find an input, we have a zero-shuffle which should always
1728 // have already been handled.
1729 if (!InputV)
1730 return SDValue();
1731
1732 do {
1733 unsigned VilVLoHi = LoongArchISD::VILVL;
1734 if (Offset >= (NumElements / 2)) {
1735 VilVLoHi = LoongArchISD::VILVH;
1736 Offset -= (NumElements / 2);
1737 }
1738
1739 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1740 SDValue Ext =
1741 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1742 InputV = DAG.getBitcast(InputVT, InputV);
1743 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1744 Scale /= 2;
1745 EltBits *= 2;
1746 NumElements /= 2;
1747 } while (Scale > 1);
1748 return DAG.getBitcast(VT, InputV);
1749 };
1750
1751 // Each iteration, try extending the elements half as much, but into twice as
1752 // many elements.
1753 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1754 NumExtElements *= 2) {
1755 if (SDValue V = Lower(NumElements / NumExtElements))
1756 return V;
1757 }
1758 return SDValue();
1759}
1760
1761/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1762///
1763/// VREPLVEI performs vector broadcast based on an element specified by an
1764/// integer immediate, with its mask being similar to:
1765/// <x, x, x, ...>
1766/// where x is any valid index.
1767///
1768/// When undef's appear in the mask they are treated as if they were whatever
1769/// value is necessary in order to fit the above form.
1770static SDValue
1772 SDValue V1, SelectionDAG &DAG,
1773 const LoongArchSubtarget &Subtarget) {
1774 int SplatIndex = -1;
1775 for (const auto &M : Mask) {
1776 if (M != -1) {
1777 SplatIndex = M;
1778 break;
1779 }
1780 }
1781
1782 if (SplatIndex == -1)
1783 return DAG.getUNDEF(VT);
1784
1785 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1786 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1787 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1788 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1789 }
1790
1791 return SDValue();
1792}
1793
1794/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1795///
1796/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1797/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1798///
1799/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1800/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1801/// When undef's appear they are treated as if they were whatever value is
1802/// necessary in order to fit the above forms.
1803///
1804/// For example:
1805/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1806/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1807/// i32 7, i32 6, i32 5, i32 4>
1808/// is lowered to:
1809/// (VSHUF4I_H $v0, $v1, 27)
1810/// where the 27 comes from:
1811/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1812static SDValue
1814 SDValue V1, SDValue V2, SelectionDAG &DAG,
1815 const LoongArchSubtarget &Subtarget) {
1816
1817 unsigned SubVecSize = 4;
1818 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1819 SubVecSize = 2;
1820
1821 int SubMask[4] = {-1, -1, -1, -1};
1822 for (unsigned i = 0; i < SubVecSize; ++i) {
1823 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1824 int M = Mask[j];
1825
1826 // Convert from vector index to 4-element subvector index
1827 // If an index refers to an element outside of the subvector then give up
1828 if (M != -1) {
1829 M -= 4 * (j / SubVecSize);
1830 if (M < 0 || M >= 4)
1831 return SDValue();
1832 }
1833
1834 // If the mask has an undef, replace it with the current index.
1835 // Note that it might still be undef if the current index is also undef
1836 if (SubMask[i] == -1)
1837 SubMask[i] = M;
1838 // Check that non-undef values are the same as in the mask. If they
1839 // aren't then give up
1840 else if (M != -1 && M != SubMask[i])
1841 return SDValue();
1842 }
1843 }
1844
1845 // Calculate the immediate. Replace any remaining undefs with zero
1846 int Imm = 0;
1847 for (int i = SubVecSize - 1; i >= 0; --i) {
1848 int M = SubMask[i];
1849
1850 if (M == -1)
1851 M = 0;
1852
1853 Imm <<= 2;
1854 Imm |= M & 0x3;
1855 }
1856
1857 MVT GRLenVT = Subtarget.getGRLenVT();
1858
1859 // Return vshuf4i.d
1860 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1861 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1862 DAG.getConstant(Imm, DL, GRLenVT));
1863
1864 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1865 DAG.getConstant(Imm, DL, GRLenVT));
1866}
1867
1868/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1869///
1870/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1871/// reverse whose mask likes:
1872/// <7, 6, 5, 4, 3, 2, 1, 0>
1873///
1874/// When undef's appear in the mask they are treated as if they were whatever
1875/// value is necessary in order to fit the above forms.
1876static SDValue
1878 SDValue V1, SelectionDAG &DAG,
1879 const LoongArchSubtarget &Subtarget) {
1880 // Only vectors with i8/i16 elements which cannot match other patterns
1881 // directly needs to do this.
1882 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1883 VT != MVT::v16i16)
1884 return SDValue();
1885
1886 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1887 return SDValue();
1888
1889 int WidenNumElts = VT.getVectorNumElements() / 4;
1890 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1891 for (int i = 0; i < WidenNumElts; ++i)
1892 WidenMask[i] = WidenNumElts - 1 - i;
1893
1894 MVT WidenVT = MVT::getVectorVT(
1895 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1896 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1897 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1898 DAG.getUNDEF(WidenVT), WidenMask);
1899
1900 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1901 DAG.getBitcast(VT, WidenRev),
1902 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1903}
1904
1905/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1906///
1907/// VPACKEV interleaves the even elements from each vector.
1908///
1909/// It is possible to lower into VPACKEV when the mask consists of two of the
1910/// following forms interleaved:
1911/// <0, 2, 4, ...>
1912/// <n, n+2, n+4, ...>
1913/// where n is the number of elements in the vector.
1914/// For example:
1915/// <0, 0, 2, 2, 4, 4, ...>
1916/// <0, n, 2, n+2, 4, n+4, ...>
1917///
1918/// When undef's appear in the mask they are treated as if they were whatever
1919/// value is necessary in order to fit the above forms.
1921 MVT VT, SDValue V1, SDValue V2,
1922 SelectionDAG &DAG) {
1923
1924 const auto &Begin = Mask.begin();
1925 const auto &End = Mask.end();
1926 SDValue OriV1 = V1, OriV2 = V2;
1927
1928 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1929 V1 = OriV1;
1930 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1931 V1 = OriV2;
1932 else
1933 return SDValue();
1934
1935 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1936 V2 = OriV1;
1937 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1938 V2 = OriV2;
1939 else
1940 return SDValue();
1941
1942 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1943}
1944
1945/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1946///
1947/// VPACKOD interleaves the odd elements from each vector.
1948///
1949/// It is possible to lower into VPACKOD when the mask consists of two of the
1950/// following forms interleaved:
1951/// <1, 3, 5, ...>
1952/// <n+1, n+3, n+5, ...>
1953/// where n is the number of elements in the vector.
1954/// For example:
1955/// <1, 1, 3, 3, 5, 5, ...>
1956/// <1, n+1, 3, n+3, 5, n+5, ...>
1957///
1958/// When undef's appear in the mask they are treated as if they were whatever
1959/// value is necessary in order to fit the above forms.
1961 MVT VT, SDValue V1, SDValue V2,
1962 SelectionDAG &DAG) {
1963
1964 const auto &Begin = Mask.begin();
1965 const auto &End = Mask.end();
1966 SDValue OriV1 = V1, OriV2 = V2;
1967
1968 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1969 V1 = OriV1;
1970 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1971 V1 = OriV2;
1972 else
1973 return SDValue();
1974
1975 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1976 V2 = OriV1;
1977 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1978 V2 = OriV2;
1979 else
1980 return SDValue();
1981
1982 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1983}
1984
1985/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1986///
1987/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1988/// of each vector.
1989///
1990/// It is possible to lower into VILVH when the mask consists of two of the
1991/// following forms interleaved:
1992/// <x, x+1, x+2, ...>
1993/// <n+x, n+x+1, n+x+2, ...>
1994/// where n is the number of elements in the vector and x is half n.
1995/// For example:
1996/// <x, x, x+1, x+1, x+2, x+2, ...>
1997/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1998///
1999/// When undef's appear in the mask they are treated as if they were whatever
2000/// value is necessary in order to fit the above forms.
2002 MVT VT, SDValue V1, SDValue V2,
2003 SelectionDAG &DAG) {
2004
2005 const auto &Begin = Mask.begin();
2006 const auto &End = Mask.end();
2007 unsigned HalfSize = Mask.size() / 2;
2008 SDValue OriV1 = V1, OriV2 = V2;
2009
2010 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2011 V1 = OriV1;
2012 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2013 V1 = OriV2;
2014 else
2015 return SDValue();
2016
2017 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2018 V2 = OriV1;
2019 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2020 1))
2021 V2 = OriV2;
2022 else
2023 return SDValue();
2024
2025 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2026}
2027
2028/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2029///
2030/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2031/// of each vector.
2032///
2033/// It is possible to lower into VILVL when the mask consists of two of the
2034/// following forms interleaved:
2035/// <0, 1, 2, ...>
2036/// <n, n+1, n+2, ...>
2037/// where n is the number of elements in the vector.
2038/// For example:
2039/// <0, 0, 1, 1, 2, 2, ...>
2040/// <0, n, 1, n+1, 2, n+2, ...>
2041///
2042/// When undef's appear in the mask they are treated as if they were whatever
2043/// value is necessary in order to fit the above forms.
2045 MVT VT, SDValue V1, SDValue V2,
2046 SelectionDAG &DAG) {
2047
2048 const auto &Begin = Mask.begin();
2049 const auto &End = Mask.end();
2050 SDValue OriV1 = V1, OriV2 = V2;
2051
2052 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2053 V1 = OriV1;
2054 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2055 V1 = OriV2;
2056 else
2057 return SDValue();
2058
2059 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2060 V2 = OriV1;
2061 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2062 V2 = OriV2;
2063 else
2064 return SDValue();
2065
2066 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2067}
2068
2069/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2070///
2071/// VPICKEV copies the even elements of each vector into the result vector.
2072///
2073/// It is possible to lower into VPICKEV when the mask consists of two of the
2074/// following forms concatenated:
2075/// <0, 2, 4, ...>
2076/// <n, n+2, n+4, ...>
2077/// where n is the number of elements in the vector.
2078/// For example:
2079/// <0, 2, 4, ..., 0, 2, 4, ...>
2080/// <0, 2, 4, ..., n, n+2, n+4, ...>
2081///
2082/// When undef's appear in the mask they are treated as if they were whatever
2083/// value is necessary in order to fit the above forms.
2085 MVT VT, SDValue V1, SDValue V2,
2086 SelectionDAG &DAG) {
2087
2088 const auto &Begin = Mask.begin();
2089 const auto &Mid = Mask.begin() + Mask.size() / 2;
2090 const auto &End = Mask.end();
2091 SDValue OriV1 = V1, OriV2 = V2;
2092
2093 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2094 V1 = OriV1;
2095 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2096 V1 = OriV2;
2097 else
2098 return SDValue();
2099
2100 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2101 V2 = OriV1;
2102 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2103 V2 = OriV2;
2104
2105 else
2106 return SDValue();
2107
2108 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2109}
2110
2111/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2112///
2113/// VPICKOD copies the odd elements of each vector into the result vector.
2114///
2115/// It is possible to lower into VPICKOD when the mask consists of two of the
2116/// following forms concatenated:
2117/// <1, 3, 5, ...>
2118/// <n+1, n+3, n+5, ...>
2119/// where n is the number of elements in the vector.
2120/// For example:
2121/// <1, 3, 5, ..., 1, 3, 5, ...>
2122/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2123///
2124/// When undef's appear in the mask they are treated as if they were whatever
2125/// value is necessary in order to fit the above forms.
2127 MVT VT, SDValue V1, SDValue V2,
2128 SelectionDAG &DAG) {
2129
2130 const auto &Begin = Mask.begin();
2131 const auto &Mid = Mask.begin() + Mask.size() / 2;
2132 const auto &End = Mask.end();
2133 SDValue OriV1 = V1, OriV2 = V2;
2134
2135 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2136 V1 = OriV1;
2137 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2138 V1 = OriV2;
2139 else
2140 return SDValue();
2141
2142 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2143 V2 = OriV1;
2144 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2145 V2 = OriV2;
2146 else
2147 return SDValue();
2148
2149 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2150}
2151
2152/// Lower VECTOR_SHUFFLE into VSHUF.
2153///
2154/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2155/// adding it as an operand to the resulting VSHUF.
2157 MVT VT, SDValue V1, SDValue V2,
2158 SelectionDAG &DAG,
2159 const LoongArchSubtarget &Subtarget) {
2160
2162 for (auto M : Mask)
2163 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2164
2165 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2166 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2167
2168 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2169 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2170 // VSHF concatenates the vectors in a bitwise fashion:
2171 // <0b00, 0b01> + <0b10, 0b11> ->
2172 // 0b0100 + 0b1110 -> 0b01001110
2173 // <0b10, 0b11, 0b00, 0b01>
2174 // We must therefore swap the operands to get the correct result.
2175 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2176}
2177
2178/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2179///
2180/// This routine breaks down the specific type of 128-bit shuffle and
2181/// dispatches to the lowering routines accordingly.
2183 SDValue V1, SDValue V2, SelectionDAG &DAG,
2184 const LoongArchSubtarget &Subtarget) {
2185 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2186 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2187 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2188 "Vector type is unsupported for lsx!");
2190 "Two operands have different types!");
2191 assert(VT.getVectorNumElements() == Mask.size() &&
2192 "Unexpected mask size for shuffle!");
2193 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2194
2195 APInt KnownUndef, KnownZero;
2196 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2197 APInt Zeroable = KnownUndef | KnownZero;
2198
2199 SDValue Result;
2200 // TODO: Add more comparison patterns.
2201 if (V2.isUndef()) {
2202 if ((Result =
2203 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2204 return Result;
2205 if ((Result =
2206 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2207 return Result;
2208 if ((Result =
2209 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2210 return Result;
2211
2212 // TODO: This comment may be enabled in the future to better match the
2213 // pattern for instruction selection.
2214 /* V2 = V1; */
2215 }
2216
2217 // It is recommended not to change the pattern comparison order for better
2218 // performance.
2219 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2220 return Result;
2221 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2222 return Result;
2223 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2224 return Result;
2225 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2226 return Result;
2227 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2228 return Result;
2229 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2230 return Result;
2231 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2232 (Result =
2233 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2234 return Result;
2235 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2236 Zeroable)))
2237 return Result;
2238 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2239 Zeroable)))
2240 return Result;
2241 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2242 Subtarget)))
2243 return Result;
2244 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2245 return NewShuffle;
2246 if ((Result =
2247 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2248 return Result;
2249 return SDValue();
2250}
2251
2252/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2253///
2254/// It is a XVREPLVEI when the mask is:
2255/// <x, x, x, ..., x+n, x+n, x+n, ...>
2256/// where the number of x is equal to n and n is half the length of vector.
2257///
2258/// When undef's appear in the mask they are treated as if they were whatever
2259/// value is necessary in order to fit the above form.
2260static SDValue
2262 SDValue V1, SelectionDAG &DAG,
2263 const LoongArchSubtarget &Subtarget) {
2264 int SplatIndex = -1;
2265 for (const auto &M : Mask) {
2266 if (M != -1) {
2267 SplatIndex = M;
2268 break;
2269 }
2270 }
2271
2272 if (SplatIndex == -1)
2273 return DAG.getUNDEF(VT);
2274
2275 const auto &Begin = Mask.begin();
2276 const auto &End = Mask.end();
2277 int HalfSize = Mask.size() / 2;
2278
2279 if (SplatIndex >= HalfSize)
2280 return SDValue();
2281
2282 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2283 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2284 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2285 0)) {
2286 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2287 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2288 }
2289
2290 return SDValue();
2291}
2292
2293/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2294static SDValue
2296 SDValue V1, SDValue V2, SelectionDAG &DAG,
2297 const LoongArchSubtarget &Subtarget) {
2298 // When the size is less than or equal to 4, lower cost instructions may be
2299 // used.
2300 if (Mask.size() <= 4)
2301 return SDValue();
2302 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2303}
2304
2305/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2306static SDValue
2308 SDValue V1, SelectionDAG &DAG,
2309 const LoongArchSubtarget &Subtarget) {
2310 // Only consider XVPERMI_D.
2311 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2312 return SDValue();
2313
2314 unsigned MaskImm = 0;
2315 for (unsigned i = 0; i < Mask.size(); ++i) {
2316 if (Mask[i] == -1)
2317 continue;
2318 MaskImm |= Mask[i] << (i * 2);
2319 }
2320
2321 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2322 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2323}
2324
2325/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2327 MVT VT, SDValue V1, SelectionDAG &DAG,
2328 const LoongArchSubtarget &Subtarget) {
2329 // LoongArch LASX only have XVPERM_W.
2330 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2331 return SDValue();
2332
2333 unsigned NumElts = VT.getVectorNumElements();
2334 unsigned HalfSize = NumElts / 2;
2335 bool FrontLo = true, FrontHi = true;
2336 bool BackLo = true, BackHi = true;
2337
2338 auto inRange = [](int val, int low, int high) {
2339 return (val == -1) || (val >= low && val < high);
2340 };
2341
2342 for (unsigned i = 0; i < HalfSize; ++i) {
2343 int Fronti = Mask[i];
2344 int Backi = Mask[i + HalfSize];
2345
2346 FrontLo &= inRange(Fronti, 0, HalfSize);
2347 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2348 BackLo &= inRange(Backi, 0, HalfSize);
2349 BackHi &= inRange(Backi, HalfSize, NumElts);
2350 }
2351
2352 // If both the lower and upper 128-bit parts access only one half of the
2353 // vector (either lower or upper), avoid using xvperm.w. The latency of
2354 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2355 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2356 return SDValue();
2357
2359 MVT GRLenVT = Subtarget.getGRLenVT();
2360 for (unsigned i = 0; i < NumElts; ++i)
2361 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2362 : DAG.getConstant(Mask[i], DL, GRLenVT));
2363 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2364
2365 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2366}
2367
2368/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2370 MVT VT, SDValue V1, SDValue V2,
2371 SelectionDAG &DAG) {
2372 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2373}
2374
2375/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2377 MVT VT, SDValue V1, SDValue V2,
2378 SelectionDAG &DAG) {
2379 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2380}
2381
2382/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2384 MVT VT, SDValue V1, SDValue V2,
2385 SelectionDAG &DAG) {
2386
2387 const auto &Begin = Mask.begin();
2388 const auto &End = Mask.end();
2389 unsigned HalfSize = Mask.size() / 2;
2390 unsigned LeftSize = HalfSize / 2;
2391 SDValue OriV1 = V1, OriV2 = V2;
2392
2393 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2394 1) &&
2395 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2396 V1 = OriV1;
2397 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2398 Mask.size() + HalfSize - LeftSize, 1) &&
2399 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2400 Mask.size() + HalfSize + LeftSize, 1))
2401 V1 = OriV2;
2402 else
2403 return SDValue();
2404
2405 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2406 1) &&
2407 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2408 1))
2409 V2 = OriV1;
2410 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2411 Mask.size() + HalfSize - LeftSize, 1) &&
2412 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2413 Mask.size() + HalfSize + LeftSize, 1))
2414 V2 = OriV2;
2415 else
2416 return SDValue();
2417
2418 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2419}
2420
2421/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2423 MVT VT, SDValue V1, SDValue V2,
2424 SelectionDAG &DAG) {
2425
2426 const auto &Begin = Mask.begin();
2427 const auto &End = Mask.end();
2428 unsigned HalfSize = Mask.size() / 2;
2429 SDValue OriV1 = V1, OriV2 = V2;
2430
2431 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2432 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2433 V1 = OriV1;
2434 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2435 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2436 Mask.size() + HalfSize, 1))
2437 V1 = OriV2;
2438 else
2439 return SDValue();
2440
2441 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2442 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2443 V2 = OriV1;
2444 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2445 1) &&
2446 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2447 Mask.size() + HalfSize, 1))
2448 V2 = OriV2;
2449 else
2450 return SDValue();
2451
2452 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2453}
2454
2455/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2457 MVT VT, SDValue V1, SDValue V2,
2458 SelectionDAG &DAG) {
2459
2460 const auto &Begin = Mask.begin();
2461 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2462 const auto &Mid = Mask.begin() + Mask.size() / 2;
2463 const auto &RightMid = Mask.end() - Mask.size() / 4;
2464 const auto &End = Mask.end();
2465 unsigned HalfSize = Mask.size() / 2;
2466 SDValue OriV1 = V1, OriV2 = V2;
2467
2468 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2469 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2470 V1 = OriV1;
2471 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2472 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2473 V1 = OriV2;
2474 else
2475 return SDValue();
2476
2477 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2478 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2479 V2 = OriV1;
2480 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2481 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2482 V2 = OriV2;
2483
2484 else
2485 return SDValue();
2486
2487 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2488}
2489
2490/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2492 MVT VT, SDValue V1, SDValue V2,
2493 SelectionDAG &DAG) {
2494
2495 const auto &Begin = Mask.begin();
2496 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2497 const auto &Mid = Mask.begin() + Mask.size() / 2;
2498 const auto &RightMid = Mask.end() - Mask.size() / 4;
2499 const auto &End = Mask.end();
2500 unsigned HalfSize = Mask.size() / 2;
2501 SDValue OriV1 = V1, OriV2 = V2;
2502
2503 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2504 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2505 V1 = OriV1;
2506 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2507 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2508 2))
2509 V1 = OriV2;
2510 else
2511 return SDValue();
2512
2513 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2514 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2515 V2 = OriV1;
2516 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2517 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2518 2))
2519 V2 = OriV2;
2520 else
2521 return SDValue();
2522
2523 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2524}
2525
2526/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2527static SDValue
2529 SDValue V1, SDValue V2, SelectionDAG &DAG,
2530 const LoongArchSubtarget &Subtarget) {
2531 // LoongArch LASX only supports xvinsve0.{w/d}.
2532 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2533 VT != MVT::v4f64)
2534 return SDValue();
2535
2536 MVT GRLenVT = Subtarget.getGRLenVT();
2537 int MaskSize = Mask.size();
2538 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2539
2540 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2541 // all other elements are either 'Base + i' or undef (-1). On success, return
2542 // the index of the replaced element. Otherwise, just return -1.
2543 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2544 int Idx = -1;
2545 for (int i = 0; i < MaskSize; ++i) {
2546 if (Mask[i] == Base + i || Mask[i] == -1)
2547 continue;
2548 if (Mask[i] != Replaced)
2549 return -1;
2550 if (Idx == -1)
2551 Idx = i;
2552 else
2553 return -1;
2554 }
2555 return Idx;
2556 };
2557
2558 // Case 1: the lowest element of V2 replaces one element in V1.
2559 int Idx = checkReplaceOne(0, MaskSize);
2560 if (Idx != -1)
2561 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2562 DAG.getConstant(Idx, DL, GRLenVT));
2563
2564 // Case 2: the lowest element of V1 replaces one element in V2.
2565 Idx = checkReplaceOne(MaskSize, 0);
2566 if (Idx != -1)
2567 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2568 DAG.getConstant(Idx, DL, GRLenVT));
2569
2570 return SDValue();
2571}
2572
2573/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2575 MVT VT, SDValue V1, SDValue V2,
2576 SelectionDAG &DAG) {
2577
2578 int MaskSize = Mask.size();
2579 int HalfSize = Mask.size() / 2;
2580 const auto &Begin = Mask.begin();
2581 const auto &Mid = Mask.begin() + HalfSize;
2582 const auto &End = Mask.end();
2583
2584 // VECTOR_SHUFFLE concatenates the vectors:
2585 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2586 // shuffling ->
2587 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2588 //
2589 // XVSHUF concatenates the vectors:
2590 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2591 // shuffling ->
2592 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2593 SmallVector<SDValue, 8> MaskAlloc;
2594 for (auto it = Begin; it < Mid; it++) {
2595 if (*it < 0) // UNDEF
2596 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2597 else if ((*it >= 0 && *it < HalfSize) ||
2598 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2599 int M = *it < HalfSize ? *it : *it - HalfSize;
2600 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2601 } else
2602 return SDValue();
2603 }
2604 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2605
2606 for (auto it = Mid; it < End; it++) {
2607 if (*it < 0) // UNDEF
2608 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2609 else if ((*it >= HalfSize && *it < MaskSize) ||
2610 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2611 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2612 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2613 } else
2614 return SDValue();
2615 }
2616 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2617
2618 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2619 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2620 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2621}
2622
2623/// Shuffle vectors by lane to generate more optimized instructions.
2624/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2625///
2626/// Therefore, except for the following four cases, other cases are regarded
2627/// as cross-lane shuffles, where optimization is relatively limited.
2628///
2629/// - Shuffle high, low lanes of two inputs vector
2630/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2631/// - Shuffle low, high lanes of two inputs vector
2632/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2633/// - Shuffle low, low lanes of two inputs vector
2634/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2635/// - Shuffle high, high lanes of two inputs vector
2636/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2637///
2638/// The first case is the closest to LoongArch instructions and the other
2639/// cases need to be converted to it for processing.
2640///
2641/// This function will return true for the last three cases above and will
2642/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2643/// cross-lane shuffle cases.
2645 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2646 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2647
2648 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2649
2650 int MaskSize = Mask.size();
2651 int HalfSize = Mask.size() / 2;
2652 MVT GRLenVT = Subtarget.getGRLenVT();
2653
2654 HalfMaskType preMask = None, postMask = None;
2655
2656 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2657 return M < 0 || (M >= 0 && M < HalfSize) ||
2658 (M >= MaskSize && M < MaskSize + HalfSize);
2659 }))
2660 preMask = HighLaneTy;
2661 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2662 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2663 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2664 }))
2665 preMask = LowLaneTy;
2666
2667 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2668 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2669 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2670 }))
2671 postMask = LowLaneTy;
2672 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2673 return M < 0 || (M >= 0 && M < HalfSize) ||
2674 (M >= MaskSize && M < MaskSize + HalfSize);
2675 }))
2676 postMask = HighLaneTy;
2677
2678 // The pre-half of mask is high lane type, and the post-half of mask
2679 // is low lane type, which is closest to the LoongArch instructions.
2680 //
2681 // Note: In the LoongArch architecture, the high lane of mask corresponds
2682 // to the lower 128-bit of vector register, and the low lane of mask
2683 // corresponds the higher 128-bit of vector register.
2684 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2685 return false;
2686 }
2687 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2688 V1 = DAG.getBitcast(MVT::v4i64, V1);
2689 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2690 DAG.getConstant(0b01001110, DL, GRLenVT));
2691 V1 = DAG.getBitcast(VT, V1);
2692
2693 if (!V2.isUndef()) {
2694 V2 = DAG.getBitcast(MVT::v4i64, V2);
2695 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2696 DAG.getConstant(0b01001110, DL, GRLenVT));
2697 V2 = DAG.getBitcast(VT, V2);
2698 }
2699
2700 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2701 *it = *it < 0 ? *it : *it - HalfSize;
2702 }
2703 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2704 *it = *it < 0 ? *it : *it + HalfSize;
2705 }
2706 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2707 V1 = DAG.getBitcast(MVT::v4i64, V1);
2708 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2709 DAG.getConstant(0b11101110, DL, GRLenVT));
2710 V1 = DAG.getBitcast(VT, V1);
2711
2712 if (!V2.isUndef()) {
2713 V2 = DAG.getBitcast(MVT::v4i64, V2);
2714 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2715 DAG.getConstant(0b11101110, DL, GRLenVT));
2716 V2 = DAG.getBitcast(VT, V2);
2717 }
2718
2719 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2720 *it = *it < 0 ? *it : *it - HalfSize;
2721 }
2722 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2723 V1 = DAG.getBitcast(MVT::v4i64, V1);
2724 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2725 DAG.getConstant(0b01000100, DL, GRLenVT));
2726 V1 = DAG.getBitcast(VT, V1);
2727
2728 if (!V2.isUndef()) {
2729 V2 = DAG.getBitcast(MVT::v4i64, V2);
2730 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2731 DAG.getConstant(0b01000100, DL, GRLenVT));
2732 V2 = DAG.getBitcast(VT, V2);
2733 }
2734
2735 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2736 *it = *it < 0 ? *it : *it + HalfSize;
2737 }
2738 } else { // cross-lane
2739 return false;
2740 }
2741
2742 return true;
2743}
2744
2745/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2746/// Only for 256-bit vector.
2747///
2748/// For example:
2749/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2750/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2751/// is lowerded to:
2752/// (XVPERMI $xr2, $xr0, 78)
2753/// (XVSHUF $xr1, $xr2, $xr0)
2754/// (XVORI $xr0, $xr1, 0)
2756 ArrayRef<int> Mask,
2757 MVT VT, SDValue V1,
2758 SDValue V2,
2759 SelectionDAG &DAG) {
2760 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2761 int Size = Mask.size();
2762 int LaneSize = Size / 2;
2763
2764 bool LaneCrossing[2] = {false, false};
2765 for (int i = 0; i < Size; ++i)
2766 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2767 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2768
2769 // Ensure that all lanes ared involved.
2770 if (!LaneCrossing[0] && !LaneCrossing[1])
2771 return SDValue();
2772
2773 SmallVector<int> InLaneMask;
2774 InLaneMask.assign(Mask.begin(), Mask.end());
2775 for (int i = 0; i < Size; ++i) {
2776 int &M = InLaneMask[i];
2777 if (M < 0)
2778 continue;
2779 if (((M % Size) / LaneSize) != (i / LaneSize))
2780 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2781 }
2782
2783 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2784 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2785 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2786 Flipped = DAG.getBitcast(VT, Flipped);
2787 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2788}
2789
2790/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2791///
2792/// This routine breaks down the specific type of 256-bit shuffle and
2793/// dispatches to the lowering routines accordingly.
2795 SDValue V1, SDValue V2, SelectionDAG &DAG,
2796 const LoongArchSubtarget &Subtarget) {
2797 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2798 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2799 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2800 "Vector type is unsupported for lasx!");
2802 "Two operands have different types!");
2803 assert(VT.getVectorNumElements() == Mask.size() &&
2804 "Unexpected mask size for shuffle!");
2805 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2806 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2807
2808 APInt KnownUndef, KnownZero;
2809 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2810 APInt Zeroable = KnownUndef | KnownZero;
2811
2812 SDValue Result;
2813 // TODO: Add more comparison patterns.
2814 if (V2.isUndef()) {
2815 if ((Result =
2816 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2817 return Result;
2818 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2819 Subtarget)))
2820 return Result;
2821 // Try to widen vectors to gain more optimization opportunities.
2822 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2823 return NewShuffle;
2824 if ((Result =
2825 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2826 return Result;
2827 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2828 return Result;
2829 if ((Result =
2830 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2831 return Result;
2832
2833 // TODO: This comment may be enabled in the future to better match the
2834 // pattern for instruction selection.
2835 /* V2 = V1; */
2836 }
2837
2838 // It is recommended not to change the pattern comparison order for better
2839 // performance.
2840 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2841 return Result;
2842 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2843 return Result;
2844 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2845 return Result;
2846 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2847 return Result;
2848 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2849 return Result;
2850 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2851 return Result;
2852 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2853 Zeroable)))
2854 return Result;
2855 if ((Result =
2856 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2857 return Result;
2858 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2859 Subtarget)))
2860 return Result;
2861
2862 // canonicalize non cross-lane shuffle vector
2863 SmallVector<int> NewMask(Mask);
2864 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2865 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2866
2867 // FIXME: Handling the remaining cases earlier can degrade performance
2868 // in some situations. Further analysis is required to enable more
2869 // effective optimizations.
2870 if (V2.isUndef()) {
2871 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2872 V1, V2, DAG)))
2873 return Result;
2874 }
2875
2876 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2877 return NewShuffle;
2878 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2879 return Result;
2880
2881 return SDValue();
2882}
2883
2884SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2885 SelectionDAG &DAG) const {
2886 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2887 ArrayRef<int> OrigMask = SVOp->getMask();
2888 SDValue V1 = Op.getOperand(0);
2889 SDValue V2 = Op.getOperand(1);
2890 MVT VT = Op.getSimpleValueType();
2891 int NumElements = VT.getVectorNumElements();
2892 SDLoc DL(Op);
2893
2894 bool V1IsUndef = V1.isUndef();
2895 bool V2IsUndef = V2.isUndef();
2896 if (V1IsUndef && V2IsUndef)
2897 return DAG.getUNDEF(VT);
2898
2899 // When we create a shuffle node we put the UNDEF node to second operand,
2900 // but in some cases the first operand may be transformed to UNDEF.
2901 // In this case we should just commute the node.
2902 if (V1IsUndef)
2903 return DAG.getCommutedVectorShuffle(*SVOp);
2904
2905 // Check for non-undef masks pointing at an undef vector and make the masks
2906 // undef as well. This makes it easier to match the shuffle based solely on
2907 // the mask.
2908 if (V2IsUndef &&
2909 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2910 SmallVector<int, 8> NewMask(OrigMask);
2911 for (int &M : NewMask)
2912 if (M >= NumElements)
2913 M = -1;
2914 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2915 }
2916
2917 // Check for illegal shuffle mask element index values.
2918 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2919 (void)MaskUpperLimit;
2920 assert(llvm::all_of(OrigMask,
2921 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2922 "Out of bounds shuffle index");
2923
2924 // For each vector width, delegate to a specialized lowering routine.
2925 if (VT.is128BitVector())
2926 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2927
2928 if (VT.is256BitVector())
2929 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2930
2931 return SDValue();
2932}
2933
2934SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2935 SelectionDAG &DAG) const {
2936 // Custom lower to ensure the libcall return is passed in an FPR on hard
2937 // float ABIs.
2938 SDLoc DL(Op);
2939 MakeLibCallOptions CallOptions;
2940 SDValue Op0 = Op.getOperand(0);
2941 SDValue Chain = SDValue();
2942 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2943 SDValue Res;
2944 std::tie(Res, Chain) =
2945 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2946 if (Subtarget.is64Bit())
2947 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2948 return DAG.getBitcast(MVT::i32, Res);
2949}
2950
2951SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2952 SelectionDAG &DAG) const {
2953 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2954 // float ABIs.
2955 SDLoc DL(Op);
2956 MakeLibCallOptions CallOptions;
2957 SDValue Op0 = Op.getOperand(0);
2958 SDValue Chain = SDValue();
2959 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2960 DL, MVT::f32, Op0)
2961 : DAG.getBitcast(MVT::f32, Op0);
2962 SDValue Res;
2963 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2964 CallOptions, DL, Chain);
2965 return Res;
2966}
2967
2968SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2969 SelectionDAG &DAG) const {
2970 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2971 SDLoc DL(Op);
2972 MakeLibCallOptions CallOptions;
2973 RTLIB::Libcall LC =
2974 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2975 SDValue Res =
2976 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2977 if (Subtarget.is64Bit())
2978 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2979 return DAG.getBitcast(MVT::i32, Res);
2980}
2981
2982SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2983 SelectionDAG &DAG) const {
2984 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2985 MVT VT = Op.getSimpleValueType();
2986 SDLoc DL(Op);
2987 Op = DAG.getNode(
2988 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2989 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2990 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2991 DL, MVT::f32, Op)
2992 : DAG.getBitcast(MVT::f32, Op);
2993 if (VT != MVT::f32)
2994 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2995 return Res;
2996}
2997
2998// Lower BUILD_VECTOR as broadcast load (if possible).
2999// For example:
3000// %a = load i8, ptr %ptr
3001// %b = build_vector %a, %a, %a, %a
3002// is lowered to :
3003// (VLDREPL_B $a0, 0)
3005 const SDLoc &DL,
3006 SelectionDAG &DAG) {
3007 MVT VT = BVOp->getSimpleValueType(0);
3008 int NumOps = BVOp->getNumOperands();
3009
3010 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3011 "Unsupported vector type for broadcast.");
3012
3013 SDValue IdentitySrc;
3014 bool IsIdeneity = true;
3015
3016 for (int i = 0; i != NumOps; i++) {
3017 SDValue Op = BVOp->getOperand(i);
3018 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3019 IsIdeneity = false;
3020 break;
3021 }
3022 IdentitySrc = BVOp->getOperand(0);
3023 }
3024
3025 // make sure that this load is valid and only has one user.
3026 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3027 return SDValue();
3028
3029 auto *LN = cast<LoadSDNode>(IdentitySrc);
3030 auto ExtType = LN->getExtensionType();
3031
3032 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3033 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3034 // Indexed loads and stores are not supported on LoongArch.
3035 assert(LN->isUnindexed() && "Unexpected indexed load.");
3036
3037 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3038 // The offset operand of unindexed load is always undefined, so there is
3039 // no need to pass it to VLDREPL.
3040 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3041 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3042 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3043 return BCast;
3044 }
3045 return SDValue();
3046}
3047
3048// Sequentially insert elements from Ops into Vector, from low to high indices.
3049// Note: Ops can have fewer elements than Vector.
3051 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3052 EVT ResTy) {
3053 assert(Ops.size() <= ResTy.getVectorNumElements());
3054
3055 SDValue Op0 = Ops[0];
3056 if (!Op0.isUndef())
3057 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3058 for (unsigned i = 1; i < Ops.size(); ++i) {
3059 SDValue Opi = Ops[i];
3060 if (Opi.isUndef())
3061 continue;
3062 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3063 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3064 }
3065}
3066
3067// Build a ResTy subvector from Node, taking NumElts elements starting at index
3068// 'first'.
3070 SelectionDAG &DAG, SDLoc DL,
3071 const LoongArchSubtarget &Subtarget,
3072 EVT ResTy, unsigned first) {
3073 unsigned NumElts = ResTy.getVectorNumElements();
3074
3075 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3076
3077 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3078 Node->op_begin() + first + NumElts);
3079 SDValue Vector = DAG.getUNDEF(ResTy);
3080 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3081 return Vector;
3082}
3083
3084SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3085 SelectionDAG &DAG) const {
3086 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3087 MVT VT = Node->getSimpleValueType(0);
3088 EVT ResTy = Op->getValueType(0);
3089 unsigned NumElts = ResTy.getVectorNumElements();
3090 SDLoc DL(Op);
3091 APInt SplatValue, SplatUndef;
3092 unsigned SplatBitSize;
3093 bool HasAnyUndefs;
3094 bool IsConstant = false;
3095 bool UseSameConstant = true;
3096 SDValue ConstantValue;
3097 bool Is128Vec = ResTy.is128BitVector();
3098 bool Is256Vec = ResTy.is256BitVector();
3099
3100 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3101 (!Subtarget.hasExtLASX() || !Is256Vec))
3102 return SDValue();
3103
3104 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3105 return Result;
3106
3107 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3108 /*MinSplatBits=*/8) &&
3109 SplatBitSize <= 64) {
3110 // We can only cope with 8, 16, 32, or 64-bit elements.
3111 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3112 SplatBitSize != 64)
3113 return SDValue();
3114
3115 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3116 // We can only handle 64-bit elements that are within
3117 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3118 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3119 if (!SplatValue.isSignedIntN(10) &&
3120 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3121 return SDValue();
3122 if ((Is128Vec && ResTy == MVT::v4i32) ||
3123 (Is256Vec && ResTy == MVT::v8i32))
3124 return Op;
3125 }
3126
3127 EVT ViaVecTy;
3128
3129 switch (SplatBitSize) {
3130 default:
3131 return SDValue();
3132 case 8:
3133 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3134 break;
3135 case 16:
3136 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3137 break;
3138 case 32:
3139 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3140 break;
3141 case 64:
3142 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3143 break;
3144 }
3145
3146 // SelectionDAG::getConstant will promote SplatValue appropriately.
3147 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3148
3149 // Bitcast to the type we originally wanted.
3150 if (ViaVecTy != ResTy)
3151 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3152
3153 return Result;
3154 }
3155
3156 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3157 return Op;
3158
3159 for (unsigned i = 0; i < NumElts; ++i) {
3160 SDValue Opi = Node->getOperand(i);
3161 if (isIntOrFPConstant(Opi)) {
3162 IsConstant = true;
3163 if (!ConstantValue.getNode())
3164 ConstantValue = Opi;
3165 else if (ConstantValue != Opi)
3166 UseSameConstant = false;
3167 }
3168 }
3169
3170 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3171 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3172 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3173 for (unsigned i = 0; i < NumElts; ++i) {
3174 SDValue Opi = Node->getOperand(i);
3175 if (!isIntOrFPConstant(Opi))
3176 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3177 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3178 }
3179 return Result;
3180 }
3181
3182 if (!IsConstant) {
3183 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3184 // the sub-sequence of the vector and then broadcast the sub-sequence.
3185 //
3186 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3187 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3188 // generates worse code in some cases. This could be further optimized
3189 // with more consideration.
3191 BitVector UndefElements;
3192 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3193 UndefElements.count() == 0) {
3194 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3195 // because the high part can be simply treated as undef.
3196 SDValue Vector = DAG.getUNDEF(ResTy);
3197 EVT FillTy = Is256Vec
3199 : ResTy;
3200 SDValue FillVec =
3201 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3202
3203 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3204
3205 unsigned SeqLen = Sequence.size();
3206 unsigned SplatLen = NumElts / SeqLen;
3207 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3208 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3209
3210 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3211 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3212 if (SplatEltTy == MVT::i128)
3213 SplatTy = MVT::v4i64;
3214
3215 SDValue SplatVec;
3216 SDValue SrcVec = DAG.getBitcast(
3217 SplatTy,
3218 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3219 if (Is256Vec) {
3220 SplatVec =
3221 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3222 : LoongArchISD::XVREPLVE0,
3223 DL, SplatTy, SrcVec);
3224 } else {
3225 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3226 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3227 }
3228
3229 return DAG.getBitcast(ResTy, SplatVec);
3230 }
3231
3232 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3233 // using memory operations is much lower.
3234 //
3235 // For 256-bit vectors, normally split into two halves and concatenate.
3236 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3237 // one non-undef element, skip spliting to avoid a worse result.
3238 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3239 ResTy == MVT::v4f64) {
3240 unsigned NonUndefCount = 0;
3241 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3242 if (!Node->getOperand(i).isUndef()) {
3243 ++NonUndefCount;
3244 if (NonUndefCount > 1)
3245 break;
3246 }
3247 }
3248 if (NonUndefCount == 1)
3249 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3250 }
3251
3252 EVT VecTy =
3253 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3254 SDValue Vector =
3255 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3256
3257 if (Is128Vec)
3258 return Vector;
3259
3260 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3261 VecTy, NumElts / 2);
3262
3263 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3264 }
3265
3266 return SDValue();
3267}
3268
3269SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3270 SelectionDAG &DAG) const {
3271 SDLoc DL(Op);
3272 MVT ResVT = Op.getSimpleValueType();
3273 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3274
3275 unsigned NumOperands = Op.getNumOperands();
3276 unsigned NumFreezeUndef = 0;
3277 unsigned NumZero = 0;
3278 unsigned NumNonZero = 0;
3279 unsigned NonZeros = 0;
3280 SmallSet<SDValue, 4> Undefs;
3281 for (unsigned i = 0; i != NumOperands; ++i) {
3282 SDValue SubVec = Op.getOperand(i);
3283 if (SubVec.isUndef())
3284 continue;
3285 if (ISD::isFreezeUndef(SubVec.getNode())) {
3286 // If the freeze(undef) has multiple uses then we must fold to zero.
3287 if (SubVec.hasOneUse()) {
3288 ++NumFreezeUndef;
3289 } else {
3290 ++NumZero;
3291 Undefs.insert(SubVec);
3292 }
3293 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3294 ++NumZero;
3295 else {
3296 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3297 NonZeros |= 1 << i;
3298 ++NumNonZero;
3299 }
3300 }
3301
3302 // If we have more than 2 non-zeros, build each half separately.
3303 if (NumNonZero > 2) {
3304 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3305 ArrayRef<SDUse> Ops = Op->ops();
3306 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3307 Ops.slice(0, NumOperands / 2));
3308 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3309 Ops.slice(NumOperands / 2));
3310 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3311 }
3312
3313 // Otherwise, build it up through insert_subvectors.
3314 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3315 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3316 : DAG.getUNDEF(ResVT));
3317
3318 // Replace Undef operands with ZeroVector.
3319 for (SDValue U : Undefs)
3320 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3321
3322 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3323 unsigned NumSubElems = SubVT.getVectorNumElements();
3324 for (unsigned i = 0; i != NumOperands; ++i) {
3325 if ((NonZeros & (1 << i)) == 0)
3326 continue;
3327
3328 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3329 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3330 }
3331
3332 return Vec;
3333}
3334
3335SDValue
3336LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3337 SelectionDAG &DAG) const {
3338 MVT EltVT = Op.getSimpleValueType();
3339 SDValue Vec = Op->getOperand(0);
3340 EVT VecTy = Vec->getValueType(0);
3341 SDValue Idx = Op->getOperand(1);
3342 SDLoc DL(Op);
3343 MVT GRLenVT = Subtarget.getGRLenVT();
3344
3345 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3346
3347 if (isa<ConstantSDNode>(Idx))
3348 return Op;
3349
3350 switch (VecTy.getSimpleVT().SimpleTy) {
3351 default:
3352 llvm_unreachable("Unexpected type");
3353 case MVT::v32i8:
3354 case MVT::v16i16:
3355 case MVT::v4i64:
3356 case MVT::v4f64: {
3357 // Extract the high half subvector and place it to the low half of a new
3358 // vector. It doesn't matter what the high half of the new vector is.
3359 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3360 SDValue VecHi =
3361 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3362 SDValue TmpVec =
3363 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3364 VecHi, DAG.getConstant(0, DL, GRLenVT));
3365
3366 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3367 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3368 // desired element.
3369 SDValue IdxCp =
3370 Subtarget.is64Bit()
3371 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3372 : DAG.getBitcast(MVT::f32, Idx);
3373 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3374 SDValue MaskVec =
3375 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3376 SDValue ResVec =
3377 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3378
3379 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3380 DAG.getConstant(0, DL, GRLenVT));
3381 }
3382 case MVT::v8i32:
3383 case MVT::v8f32: {
3384 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3385 SDValue SplatValue =
3386 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3387
3388 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3389 DAG.getConstant(0, DL, GRLenVT));
3390 }
3391 }
3392}
3393
3394SDValue
3395LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3396 SelectionDAG &DAG) const {
3397 MVT VT = Op.getSimpleValueType();
3398 MVT EltVT = VT.getVectorElementType();
3399 unsigned NumElts = VT.getVectorNumElements();
3400 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3401 SDLoc DL(Op);
3402 SDValue Op0 = Op.getOperand(0);
3403 SDValue Op1 = Op.getOperand(1);
3404 SDValue Op2 = Op.getOperand(2);
3405
3406 if (isa<ConstantSDNode>(Op2))
3407 return Op;
3408
3409 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3410 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3411
3412 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3413 return SDValue();
3414
3415 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3416 SmallVector<SDValue, 32> RawIndices;
3417 SDValue SplatIdx;
3418 SDValue Indices;
3419
3420 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3421 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3422 for (unsigned i = 0; i < NumElts; ++i) {
3423 RawIndices.push_back(Op2);
3424 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3425 }
3426 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3427 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3428
3429 RawIndices.clear();
3430 for (unsigned i = 0; i < NumElts; ++i) {
3431 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3432 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3433 }
3434 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3435 Indices = DAG.getBitcast(IdxVTy, Indices);
3436 } else {
3437 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3438
3439 for (unsigned i = 0; i < NumElts; ++i)
3440 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3441 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3442 }
3443
3444 // insert vec, elt, idx
3445 // =>
3446 // select (splatidx == {0,1,2...}) ? splatelt : vec
3447 SDValue SelectCC =
3448 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3449 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3450}
3451
3452SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3453 SelectionDAG &DAG) const {
3454 SDLoc DL(Op);
3455 SyncScope::ID FenceSSID =
3456 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3457
3458 // singlethread fences only synchronize with signal handlers on the same
3459 // thread and thus only need to preserve instruction order, not actually
3460 // enforce memory ordering.
3461 if (FenceSSID == SyncScope::SingleThread)
3462 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3463 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3464
3465 return Op;
3466}
3467
3468SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3469 SelectionDAG &DAG) const {
3470
3471 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3472 DAG.getContext()->emitError(
3473 "On LA64, only 64-bit registers can be written.");
3474 return Op.getOperand(0);
3475 }
3476
3477 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3478 DAG.getContext()->emitError(
3479 "On LA32, only 32-bit registers can be written.");
3480 return Op.getOperand(0);
3481 }
3482
3483 return Op;
3484}
3485
3486SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3487 SelectionDAG &DAG) const {
3488 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3489 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3490 "be a constant integer");
3491 return SDValue();
3492 }
3493
3494 MachineFunction &MF = DAG.getMachineFunction();
3496 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3497 EVT VT = Op.getValueType();
3498 SDLoc DL(Op);
3499 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3500 unsigned Depth = Op.getConstantOperandVal(0);
3501 int GRLenInBytes = Subtarget.getGRLen() / 8;
3502
3503 while (Depth--) {
3504 int Offset = -(GRLenInBytes * 2);
3505 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3506 DAG.getSignedConstant(Offset, DL, VT));
3507 FrameAddr =
3508 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3509 }
3510 return FrameAddr;
3511}
3512
3513SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3514 SelectionDAG &DAG) const {
3515 // Currently only support lowering return address for current frame.
3516 if (Op.getConstantOperandVal(0) != 0) {
3517 DAG.getContext()->emitError(
3518 "return address can only be determined for the current frame");
3519 return SDValue();
3520 }
3521
3522 MachineFunction &MF = DAG.getMachineFunction();
3524 MVT GRLenVT = Subtarget.getGRLenVT();
3525
3526 // Return the value of the return address register, marking it an implicit
3527 // live-in.
3528 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3529 getRegClassFor(GRLenVT));
3530 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3531}
3532
3533SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3534 SelectionDAG &DAG) const {
3535 MachineFunction &MF = DAG.getMachineFunction();
3536 auto Size = Subtarget.getGRLen() / 8;
3537 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3538 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3539}
3540
3541SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3542 SelectionDAG &DAG) const {
3543 MachineFunction &MF = DAG.getMachineFunction();
3544 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3545
3546 SDLoc DL(Op);
3547 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3549
3550 // vastart just stores the address of the VarArgsFrameIndex slot into the
3551 // memory location argument.
3552 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3553 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3554 MachinePointerInfo(SV));
3555}
3556
3557SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3558 SelectionDAG &DAG) const {
3559 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3560 !Subtarget.hasBasicD() && "unexpected target features");
3561
3562 SDLoc DL(Op);
3563 SDValue Op0 = Op.getOperand(0);
3564 if (Op0->getOpcode() == ISD::AND) {
3565 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3566 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3567 return Op;
3568 }
3569
3570 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3571 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3572 Op0.getConstantOperandVal(2) == UINT64_C(0))
3573 return Op;
3574
3575 if (Op0.getOpcode() == ISD::AssertZext &&
3576 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3577 return Op;
3578
3579 EVT OpVT = Op0.getValueType();
3580 EVT RetVT = Op.getValueType();
3581 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3582 MakeLibCallOptions CallOptions;
3583 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3584 SDValue Chain = SDValue();
3586 std::tie(Result, Chain) =
3587 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3588 return Result;
3589}
3590
3591SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3592 SelectionDAG &DAG) const {
3593 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3594 !Subtarget.hasBasicD() && "unexpected target features");
3595
3596 SDLoc DL(Op);
3597 SDValue Op0 = Op.getOperand(0);
3598
3599 if ((Op0.getOpcode() == ISD::AssertSext ||
3601 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3602 return Op;
3603
3604 EVT OpVT = Op0.getValueType();
3605 EVT RetVT = Op.getValueType();
3606 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3607 MakeLibCallOptions CallOptions;
3608 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3609 SDValue Chain = SDValue();
3611 std::tie(Result, Chain) =
3612 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3613 return Result;
3614}
3615
3616SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3617 SelectionDAG &DAG) const {
3618
3619 SDLoc DL(Op);
3620 EVT VT = Op.getValueType();
3621 SDValue Op0 = Op.getOperand(0);
3622 EVT Op0VT = Op0.getValueType();
3623
3624 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3625 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3626 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3627 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3628 }
3629 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3630 SDValue Lo, Hi;
3631 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3632 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3633 }
3634 return Op;
3635}
3636
3637SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3638 SelectionDAG &DAG) const {
3639
3640 SDLoc DL(Op);
3641 SDValue Op0 = Op.getOperand(0);
3642
3643 if (Op0.getValueType() == MVT::f16)
3644 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3645
3646 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3647 !Subtarget.hasBasicD()) {
3648 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3649 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3650 }
3651
3652 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3653 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3654 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3655}
3656
3658 SelectionDAG &DAG, unsigned Flags) {
3659 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3660}
3661
3663 SelectionDAG &DAG, unsigned Flags) {
3664 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3665 Flags);
3666}
3667
3669 SelectionDAG &DAG, unsigned Flags) {
3670 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3671 N->getOffset(), Flags);
3672}
3673
3675 SelectionDAG &DAG, unsigned Flags) {
3676 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3677}
3678
3679template <class NodeTy>
3680SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3682 bool IsLocal) const {
3683 SDLoc DL(N);
3684 EVT Ty = getPointerTy(DAG.getDataLayout());
3685 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3686 SDValue Load;
3687
3688 switch (M) {
3689 default:
3690 report_fatal_error("Unsupported code model");
3691
3692 case CodeModel::Large: {
3693 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3694
3695 // This is not actually used, but is necessary for successfully matching
3696 // the PseudoLA_*_LARGE nodes.
3697 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3698 if (IsLocal) {
3699 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3700 // eventually becomes the desired 5-insn code sequence.
3701 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3702 Tmp, Addr),
3703 0);
3704 } else {
3705 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3706 // eventually becomes the desired 5-insn code sequence.
3707 Load = SDValue(
3708 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3709 0);
3710 }
3711 break;
3712 }
3713
3714 case CodeModel::Small:
3715 case CodeModel::Medium:
3716 if (IsLocal) {
3717 // This generates the pattern (PseudoLA_PCREL sym), which
3718 //
3719 // for la32r expands to:
3720 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3721 //
3722 // for la32s and la64 expands to:
3723 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3724 Load = SDValue(
3725 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3726 } else {
3727 // This generates the pattern (PseudoLA_GOT sym), which
3728 //
3729 // for la32r expands to:
3730 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3731 //
3732 // for la32s and la64 expands to:
3733 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3734 Load =
3735 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3736 }
3737 }
3738
3739 if (!IsLocal) {
3740 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3741 MachineFunction &MF = DAG.getMachineFunction();
3742 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3746 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3747 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3748 }
3749
3750 return Load;
3751}
3752
3753SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3754 SelectionDAG &DAG) const {
3755 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3756 DAG.getTarget().getCodeModel());
3757}
3758
3759SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3760 SelectionDAG &DAG) const {
3761 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3762 DAG.getTarget().getCodeModel());
3763}
3764
3765SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3768 DAG.getTarget().getCodeModel());
3769}
3770
3771SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3772 SelectionDAG &DAG) const {
3773 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3774 assert(N->getOffset() == 0 && "unexpected offset in global node");
3775 auto CM = DAG.getTarget().getCodeModel();
3776 const GlobalValue *GV = N->getGlobal();
3777
3778 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3779 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3780 CM = *GCM;
3781 }
3782
3783 return getAddr(N, DAG, CM, GV->isDSOLocal());
3784}
3785
3786SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3787 SelectionDAG &DAG,
3788 unsigned Opc, bool UseGOT,
3789 bool Large) const {
3790 SDLoc DL(N);
3791 EVT Ty = getPointerTy(DAG.getDataLayout());
3792 MVT GRLenVT = Subtarget.getGRLenVT();
3793
3794 // This is not actually used, but is necessary for successfully matching the
3795 // PseudoLA_*_LARGE nodes.
3796 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3797 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3798
3799 // Only IE needs an extra argument for large code model.
3800 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3801 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3802 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3803
3804 // If it is LE for normal/medium code model, the add tp operation will occur
3805 // during the pseudo-instruction expansion.
3806 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3807 return Offset;
3808
3809 if (UseGOT) {
3810 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3811 MachineFunction &MF = DAG.getMachineFunction();
3812 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3816 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3817 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3818 }
3819
3820 // Add the thread pointer.
3821 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3822 DAG.getRegister(LoongArch::R2, GRLenVT));
3823}
3824
3825SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3826 SelectionDAG &DAG,
3827 unsigned Opc,
3828 bool Large) const {
3829 SDLoc DL(N);
3830 EVT Ty = getPointerTy(DAG.getDataLayout());
3831 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3832
3833 // This is not actually used, but is necessary for successfully matching the
3834 // PseudoLA_*_LARGE nodes.
3835 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3836
3837 // Use a PC-relative addressing mode to access the dynamic GOT address.
3838 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3839 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3840 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3841
3842 // Prepare argument list to generate call.
3844 Args.emplace_back(Load, CallTy);
3845
3846 // Setup call to __tls_get_addr.
3847 TargetLowering::CallLoweringInfo CLI(DAG);
3848 CLI.setDebugLoc(DL)
3849 .setChain(DAG.getEntryNode())
3850 .setLibCallee(CallingConv::C, CallTy,
3851 DAG.getExternalSymbol("__tls_get_addr", Ty),
3852 std::move(Args));
3853
3854 return LowerCallTo(CLI).first;
3855}
3856
3857SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3858 SelectionDAG &DAG, unsigned Opc,
3859 bool Large) const {
3860 SDLoc DL(N);
3861 EVT Ty = getPointerTy(DAG.getDataLayout());
3862 const GlobalValue *GV = N->getGlobal();
3863
3864 // This is not actually used, but is necessary for successfully matching the
3865 // PseudoLA_*_LARGE nodes.
3866 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3867
3868 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3869 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3870 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3871 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3872 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3873}
3874
3875SDValue
3876LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3877 SelectionDAG &DAG) const {
3880 report_fatal_error("In GHC calling convention TLS is not supported");
3881
3882 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3883 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3884
3885 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3886 assert(N->getOffset() == 0 && "unexpected offset in global node");
3887
3888 if (DAG.getTarget().useEmulatedTLS())
3889 reportFatalUsageError("the emulated TLS is prohibited");
3890
3891 bool IsDesc = DAG.getTarget().useTLSDESC();
3892
3893 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3895 // In this model, application code calls the dynamic linker function
3896 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3897 // runtime.
3898 if (!IsDesc)
3899 return getDynamicTLSAddr(N, DAG,
3900 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3901 : LoongArch::PseudoLA_TLS_GD,
3902 Large);
3903 break;
3905 // Same as GeneralDynamic, except for assembly modifiers and relocation
3906 // records.
3907 if (!IsDesc)
3908 return getDynamicTLSAddr(N, DAG,
3909 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3910 : LoongArch::PseudoLA_TLS_LD,
3911 Large);
3912 break;
3914 // This model uses the GOT to resolve TLS offsets.
3915 return getStaticTLSAddr(N, DAG,
3916 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3917 : LoongArch::PseudoLA_TLS_IE,
3918 /*UseGOT=*/true, Large);
3920 // This model is used when static linking as the TLS offsets are resolved
3921 // during program linking.
3922 //
3923 // This node doesn't need an extra argument for the large code model.
3924 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3925 /*UseGOT=*/false, Large);
3926 }
3927
3928 return getTLSDescAddr(N, DAG,
3929 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3930 : LoongArch::PseudoLA_TLS_DESC,
3931 Large);
3932}
3933
3934template <unsigned N>
3936 SelectionDAG &DAG, bool IsSigned = false) {
3937 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3938 // Check the ImmArg.
3939 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3940 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3941 DAG.getContext()->emitError(Op->getOperationName(0) +
3942 ": argument out of range.");
3943 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3944 }
3945 return SDValue();
3946}
3947
3948SDValue
3949LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3950 SelectionDAG &DAG) const {
3951 switch (Op.getConstantOperandVal(0)) {
3952 default:
3953 return SDValue(); // Don't custom lower most intrinsics.
3954 case Intrinsic::thread_pointer: {
3955 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3956 return DAG.getRegister(LoongArch::R2, PtrVT);
3957 }
3958 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3959 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3960 case Intrinsic::loongarch_lsx_vreplvei_d:
3961 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3962 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3963 case Intrinsic::loongarch_lsx_vreplvei_w:
3964 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3965 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3966 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3967 case Intrinsic::loongarch_lasx_xvpickve_d:
3968 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3969 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3970 case Intrinsic::loongarch_lasx_xvinsve0_d:
3971 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3972 case Intrinsic::loongarch_lsx_vsat_b:
3973 case Intrinsic::loongarch_lsx_vsat_bu:
3974 case Intrinsic::loongarch_lsx_vrotri_b:
3975 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3976 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3977 case Intrinsic::loongarch_lsx_vsrlri_b:
3978 case Intrinsic::loongarch_lsx_vsrari_b:
3979 case Intrinsic::loongarch_lsx_vreplvei_h:
3980 case Intrinsic::loongarch_lasx_xvsat_b:
3981 case Intrinsic::loongarch_lasx_xvsat_bu:
3982 case Intrinsic::loongarch_lasx_xvrotri_b:
3983 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3984 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3985 case Intrinsic::loongarch_lasx_xvsrlri_b:
3986 case Intrinsic::loongarch_lasx_xvsrari_b:
3987 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3988 case Intrinsic::loongarch_lasx_xvpickve_w:
3989 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3990 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3991 case Intrinsic::loongarch_lasx_xvinsve0_w:
3992 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3993 case Intrinsic::loongarch_lsx_vsat_h:
3994 case Intrinsic::loongarch_lsx_vsat_hu:
3995 case Intrinsic::loongarch_lsx_vrotri_h:
3996 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3997 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3998 case Intrinsic::loongarch_lsx_vsrlri_h:
3999 case Intrinsic::loongarch_lsx_vsrari_h:
4000 case Intrinsic::loongarch_lsx_vreplvei_b:
4001 case Intrinsic::loongarch_lasx_xvsat_h:
4002 case Intrinsic::loongarch_lasx_xvsat_hu:
4003 case Intrinsic::loongarch_lasx_xvrotri_h:
4004 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4005 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4006 case Intrinsic::loongarch_lasx_xvsrlri_h:
4007 case Intrinsic::loongarch_lasx_xvsrari_h:
4008 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4009 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4010 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4011 case Intrinsic::loongarch_lsx_vsrani_b_h:
4012 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4013 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4014 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4015 case Intrinsic::loongarch_lsx_vssrani_b_h:
4016 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4017 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4018 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4019 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4020 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4021 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4022 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4023 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4024 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4025 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4026 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4027 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4028 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4029 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4030 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4031 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4032 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4033 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4034 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4035 case Intrinsic::loongarch_lsx_vsat_w:
4036 case Intrinsic::loongarch_lsx_vsat_wu:
4037 case Intrinsic::loongarch_lsx_vrotri_w:
4038 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4039 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4040 case Intrinsic::loongarch_lsx_vsrlri_w:
4041 case Intrinsic::loongarch_lsx_vsrari_w:
4042 case Intrinsic::loongarch_lsx_vslei_bu:
4043 case Intrinsic::loongarch_lsx_vslei_hu:
4044 case Intrinsic::loongarch_lsx_vslei_wu:
4045 case Intrinsic::loongarch_lsx_vslei_du:
4046 case Intrinsic::loongarch_lsx_vslti_bu:
4047 case Intrinsic::loongarch_lsx_vslti_hu:
4048 case Intrinsic::loongarch_lsx_vslti_wu:
4049 case Intrinsic::loongarch_lsx_vslti_du:
4050 case Intrinsic::loongarch_lsx_vbsll_v:
4051 case Intrinsic::loongarch_lsx_vbsrl_v:
4052 case Intrinsic::loongarch_lasx_xvsat_w:
4053 case Intrinsic::loongarch_lasx_xvsat_wu:
4054 case Intrinsic::loongarch_lasx_xvrotri_w:
4055 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4056 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4057 case Intrinsic::loongarch_lasx_xvsrlri_w:
4058 case Intrinsic::loongarch_lasx_xvsrari_w:
4059 case Intrinsic::loongarch_lasx_xvslei_bu:
4060 case Intrinsic::loongarch_lasx_xvslei_hu:
4061 case Intrinsic::loongarch_lasx_xvslei_wu:
4062 case Intrinsic::loongarch_lasx_xvslei_du:
4063 case Intrinsic::loongarch_lasx_xvslti_bu:
4064 case Intrinsic::loongarch_lasx_xvslti_hu:
4065 case Intrinsic::loongarch_lasx_xvslti_wu:
4066 case Intrinsic::loongarch_lasx_xvslti_du:
4067 case Intrinsic::loongarch_lasx_xvbsll_v:
4068 case Intrinsic::loongarch_lasx_xvbsrl_v:
4069 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4070 case Intrinsic::loongarch_lsx_vseqi_b:
4071 case Intrinsic::loongarch_lsx_vseqi_h:
4072 case Intrinsic::loongarch_lsx_vseqi_w:
4073 case Intrinsic::loongarch_lsx_vseqi_d:
4074 case Intrinsic::loongarch_lsx_vslei_b:
4075 case Intrinsic::loongarch_lsx_vslei_h:
4076 case Intrinsic::loongarch_lsx_vslei_w:
4077 case Intrinsic::loongarch_lsx_vslei_d:
4078 case Intrinsic::loongarch_lsx_vslti_b:
4079 case Intrinsic::loongarch_lsx_vslti_h:
4080 case Intrinsic::loongarch_lsx_vslti_w:
4081 case Intrinsic::loongarch_lsx_vslti_d:
4082 case Intrinsic::loongarch_lasx_xvseqi_b:
4083 case Intrinsic::loongarch_lasx_xvseqi_h:
4084 case Intrinsic::loongarch_lasx_xvseqi_w:
4085 case Intrinsic::loongarch_lasx_xvseqi_d:
4086 case Intrinsic::loongarch_lasx_xvslei_b:
4087 case Intrinsic::loongarch_lasx_xvslei_h:
4088 case Intrinsic::loongarch_lasx_xvslei_w:
4089 case Intrinsic::loongarch_lasx_xvslei_d:
4090 case Intrinsic::loongarch_lasx_xvslti_b:
4091 case Intrinsic::loongarch_lasx_xvslti_h:
4092 case Intrinsic::loongarch_lasx_xvslti_w:
4093 case Intrinsic::loongarch_lasx_xvslti_d:
4094 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4095 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4096 case Intrinsic::loongarch_lsx_vsrani_h_w:
4097 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4098 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4099 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4100 case Intrinsic::loongarch_lsx_vssrani_h_w:
4101 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4102 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4103 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4104 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4105 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4106 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4107 case Intrinsic::loongarch_lsx_vfrstpi_b:
4108 case Intrinsic::loongarch_lsx_vfrstpi_h:
4109 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4110 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4111 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4112 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4113 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4114 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4115 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4116 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4117 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4118 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4119 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4120 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4121 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4122 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4123 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4124 case Intrinsic::loongarch_lsx_vsat_d:
4125 case Intrinsic::loongarch_lsx_vsat_du:
4126 case Intrinsic::loongarch_lsx_vrotri_d:
4127 case Intrinsic::loongarch_lsx_vsrlri_d:
4128 case Intrinsic::loongarch_lsx_vsrari_d:
4129 case Intrinsic::loongarch_lasx_xvsat_d:
4130 case Intrinsic::loongarch_lasx_xvsat_du:
4131 case Intrinsic::loongarch_lasx_xvrotri_d:
4132 case Intrinsic::loongarch_lasx_xvsrlri_d:
4133 case Intrinsic::loongarch_lasx_xvsrari_d:
4134 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4135 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4136 case Intrinsic::loongarch_lsx_vsrani_w_d:
4137 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4138 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4139 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4140 case Intrinsic::loongarch_lsx_vssrani_w_d:
4141 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4142 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4143 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4144 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4145 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4146 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4147 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4148 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4149 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4150 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4151 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4152 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4153 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4154 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4155 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4156 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4157 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4158 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4159 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4160 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4161 case Intrinsic::loongarch_lsx_vsrani_d_q:
4162 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4163 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4164 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4165 case Intrinsic::loongarch_lsx_vssrani_d_q:
4166 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4167 case Intrinsic::loongarch_lsx_vssrani_du_q:
4168 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4169 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4170 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4171 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4172 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4173 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4174 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4175 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4176 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4177 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4178 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4179 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4180 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4181 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4182 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4183 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4184 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4185 case Intrinsic::loongarch_lsx_vnori_b:
4186 case Intrinsic::loongarch_lsx_vshuf4i_b:
4187 case Intrinsic::loongarch_lsx_vshuf4i_h:
4188 case Intrinsic::loongarch_lsx_vshuf4i_w:
4189 case Intrinsic::loongarch_lasx_xvnori_b:
4190 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4191 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4192 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4193 case Intrinsic::loongarch_lasx_xvpermi_d:
4194 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4195 case Intrinsic::loongarch_lsx_vshuf4i_d:
4196 case Intrinsic::loongarch_lsx_vpermi_w:
4197 case Intrinsic::loongarch_lsx_vbitseli_b:
4198 case Intrinsic::loongarch_lsx_vextrins_b:
4199 case Intrinsic::loongarch_lsx_vextrins_h:
4200 case Intrinsic::loongarch_lsx_vextrins_w:
4201 case Intrinsic::loongarch_lsx_vextrins_d:
4202 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4203 case Intrinsic::loongarch_lasx_xvpermi_w:
4204 case Intrinsic::loongarch_lasx_xvpermi_q:
4205 case Intrinsic::loongarch_lasx_xvbitseli_b:
4206 case Intrinsic::loongarch_lasx_xvextrins_b:
4207 case Intrinsic::loongarch_lasx_xvextrins_h:
4208 case Intrinsic::loongarch_lasx_xvextrins_w:
4209 case Intrinsic::loongarch_lasx_xvextrins_d:
4210 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4211 case Intrinsic::loongarch_lsx_vrepli_b:
4212 case Intrinsic::loongarch_lsx_vrepli_h:
4213 case Intrinsic::loongarch_lsx_vrepli_w:
4214 case Intrinsic::loongarch_lsx_vrepli_d:
4215 case Intrinsic::loongarch_lasx_xvrepli_b:
4216 case Intrinsic::loongarch_lasx_xvrepli_h:
4217 case Intrinsic::loongarch_lasx_xvrepli_w:
4218 case Intrinsic::loongarch_lasx_xvrepli_d:
4219 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4220 case Intrinsic::loongarch_lsx_vldi:
4221 case Intrinsic::loongarch_lasx_xvldi:
4222 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4223 }
4224}
4225
4226// Helper function that emits error message for intrinsics with chain and return
4227// merge values of a UNDEF and the chain.
4229 StringRef ErrorMsg,
4230 SelectionDAG &DAG) {
4231 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4232 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4233 SDLoc(Op));
4234}
4235
4236SDValue
4237LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4238 SelectionDAG &DAG) const {
4239 SDLoc DL(Op);
4240 MVT GRLenVT = Subtarget.getGRLenVT();
4241 EVT VT = Op.getValueType();
4242 SDValue Chain = Op.getOperand(0);
4243 const StringRef ErrorMsgOOR = "argument out of range";
4244 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4245 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4246
4247 switch (Op.getConstantOperandVal(1)) {
4248 default:
4249 return Op;
4250 case Intrinsic::loongarch_crc_w_b_w:
4251 case Intrinsic::loongarch_crc_w_h_w:
4252 case Intrinsic::loongarch_crc_w_w_w:
4253 case Intrinsic::loongarch_crc_w_d_w:
4254 case Intrinsic::loongarch_crcc_w_b_w:
4255 case Intrinsic::loongarch_crcc_w_h_w:
4256 case Intrinsic::loongarch_crcc_w_w_w:
4257 case Intrinsic::loongarch_crcc_w_d_w:
4258 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4259 case Intrinsic::loongarch_csrrd_w:
4260 case Intrinsic::loongarch_csrrd_d: {
4261 unsigned Imm = Op.getConstantOperandVal(2);
4262 return !isUInt<14>(Imm)
4263 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4264 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4265 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4266 }
4267 case Intrinsic::loongarch_csrwr_w:
4268 case Intrinsic::loongarch_csrwr_d: {
4269 unsigned Imm = Op.getConstantOperandVal(3);
4270 return !isUInt<14>(Imm)
4271 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4272 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4273 {Chain, Op.getOperand(2),
4274 DAG.getConstant(Imm, DL, GRLenVT)});
4275 }
4276 case Intrinsic::loongarch_csrxchg_w:
4277 case Intrinsic::loongarch_csrxchg_d: {
4278 unsigned Imm = Op.getConstantOperandVal(4);
4279 return !isUInt<14>(Imm)
4280 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4281 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4282 {Chain, Op.getOperand(2), Op.getOperand(3),
4283 DAG.getConstant(Imm, DL, GRLenVT)});
4284 }
4285 case Intrinsic::loongarch_iocsrrd_d: {
4286 return DAG.getNode(
4287 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4288 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4289 }
4290#define IOCSRRD_CASE(NAME, NODE) \
4291 case Intrinsic::loongarch_##NAME: { \
4292 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4293 {Chain, Op.getOperand(2)}); \
4294 }
4295 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4296 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4297 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4298#undef IOCSRRD_CASE
4299 case Intrinsic::loongarch_cpucfg: {
4300 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4301 {Chain, Op.getOperand(2)});
4302 }
4303 case Intrinsic::loongarch_lddir_d: {
4304 unsigned Imm = Op.getConstantOperandVal(3);
4305 return !isUInt<8>(Imm)
4306 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4307 : Op;
4308 }
4309 case Intrinsic::loongarch_movfcsr2gr: {
4310 if (!Subtarget.hasBasicF())
4311 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4312 unsigned Imm = Op.getConstantOperandVal(2);
4313 return !isUInt<2>(Imm)
4314 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4315 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4316 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4317 }
4318 case Intrinsic::loongarch_lsx_vld:
4319 case Intrinsic::loongarch_lsx_vldrepl_b:
4320 case Intrinsic::loongarch_lasx_xvld:
4321 case Intrinsic::loongarch_lasx_xvldrepl_b:
4322 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4323 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4324 : SDValue();
4325 case Intrinsic::loongarch_lsx_vldrepl_h:
4326 case Intrinsic::loongarch_lasx_xvldrepl_h:
4327 return !isShiftedInt<11, 1>(
4328 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4330 Op, "argument out of range or not a multiple of 2", DAG)
4331 : SDValue();
4332 case Intrinsic::loongarch_lsx_vldrepl_w:
4333 case Intrinsic::loongarch_lasx_xvldrepl_w:
4334 return !isShiftedInt<10, 2>(
4335 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4337 Op, "argument out of range or not a multiple of 4", DAG)
4338 : SDValue();
4339 case Intrinsic::loongarch_lsx_vldrepl_d:
4340 case Intrinsic::loongarch_lasx_xvldrepl_d:
4341 return !isShiftedInt<9, 3>(
4342 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4344 Op, "argument out of range or not a multiple of 8", DAG)
4345 : SDValue();
4346 }
4347}
4348
4349// Helper function that emits error message for intrinsics with void return
4350// value and return the chain.
4352 SelectionDAG &DAG) {
4353
4354 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4355 return Op.getOperand(0);
4356}
4357
4358SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4359 SelectionDAG &DAG) const {
4360 SDLoc DL(Op);
4361 MVT GRLenVT = Subtarget.getGRLenVT();
4362 SDValue Chain = Op.getOperand(0);
4363 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4364 SDValue Op2 = Op.getOperand(2);
4365 const StringRef ErrorMsgOOR = "argument out of range";
4366 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4367 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4368 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4369
4370 switch (IntrinsicEnum) {
4371 default:
4372 // TODO: Add more Intrinsics.
4373 return SDValue();
4374 case Intrinsic::loongarch_cacop_d:
4375 case Intrinsic::loongarch_cacop_w: {
4376 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4377 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4378 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4379 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4380 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4381 unsigned Imm1 = Op2->getAsZExtVal();
4382 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4383 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4384 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4385 return Op;
4386 }
4387 case Intrinsic::loongarch_dbar: {
4388 unsigned Imm = Op2->getAsZExtVal();
4389 return !isUInt<15>(Imm)
4390 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4391 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4392 DAG.getConstant(Imm, DL, GRLenVT));
4393 }
4394 case Intrinsic::loongarch_ibar: {
4395 unsigned Imm = Op2->getAsZExtVal();
4396 return !isUInt<15>(Imm)
4397 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4398 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4399 DAG.getConstant(Imm, DL, GRLenVT));
4400 }
4401 case Intrinsic::loongarch_break: {
4402 unsigned Imm = Op2->getAsZExtVal();
4403 return !isUInt<15>(Imm)
4404 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4405 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4406 DAG.getConstant(Imm, DL, GRLenVT));
4407 }
4408 case Intrinsic::loongarch_movgr2fcsr: {
4409 if (!Subtarget.hasBasicF())
4410 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4411 unsigned Imm = Op2->getAsZExtVal();
4412 return !isUInt<2>(Imm)
4413 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4414 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4415 DAG.getConstant(Imm, DL, GRLenVT),
4416 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4417 Op.getOperand(3)));
4418 }
4419 case Intrinsic::loongarch_syscall: {
4420 unsigned Imm = Op2->getAsZExtVal();
4421 return !isUInt<15>(Imm)
4422 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4423 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4424 DAG.getConstant(Imm, DL, GRLenVT));
4425 }
4426#define IOCSRWR_CASE(NAME, NODE) \
4427 case Intrinsic::loongarch_##NAME: { \
4428 SDValue Op3 = Op.getOperand(3); \
4429 return Subtarget.is64Bit() \
4430 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4432 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4433 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4434 Op3); \
4435 }
4436 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4437 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4438 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4439#undef IOCSRWR_CASE
4440 case Intrinsic::loongarch_iocsrwr_d: {
4441 return !Subtarget.is64Bit()
4442 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4443 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4444 Op2,
4445 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4446 Op.getOperand(3)));
4447 }
4448#define ASRT_LE_GT_CASE(NAME) \
4449 case Intrinsic::loongarch_##NAME: { \
4450 return !Subtarget.is64Bit() \
4451 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4452 : Op; \
4453 }
4454 ASRT_LE_GT_CASE(asrtle_d)
4455 ASRT_LE_GT_CASE(asrtgt_d)
4456#undef ASRT_LE_GT_CASE
4457 case Intrinsic::loongarch_ldpte_d: {
4458 unsigned Imm = Op.getConstantOperandVal(3);
4459 return !Subtarget.is64Bit()
4460 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4461 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4462 : Op;
4463 }
4464 case Intrinsic::loongarch_lsx_vst:
4465 case Intrinsic::loongarch_lasx_xvst:
4466 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4467 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4468 : SDValue();
4469 case Intrinsic::loongarch_lasx_xvstelm_b:
4470 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4471 !isUInt<5>(Op.getConstantOperandVal(5)))
4472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4473 : SDValue();
4474 case Intrinsic::loongarch_lsx_vstelm_b:
4475 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4476 !isUInt<4>(Op.getConstantOperandVal(5)))
4477 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4478 : SDValue();
4479 case Intrinsic::loongarch_lasx_xvstelm_h:
4480 return (!isShiftedInt<8, 1>(
4481 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4482 !isUInt<4>(Op.getConstantOperandVal(5)))
4484 Op, "argument out of range or not a multiple of 2", DAG)
4485 : SDValue();
4486 case Intrinsic::loongarch_lsx_vstelm_h:
4487 return (!isShiftedInt<8, 1>(
4488 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4489 !isUInt<3>(Op.getConstantOperandVal(5)))
4491 Op, "argument out of range or not a multiple of 2", DAG)
4492 : SDValue();
4493 case Intrinsic::loongarch_lasx_xvstelm_w:
4494 return (!isShiftedInt<8, 2>(
4495 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4496 !isUInt<3>(Op.getConstantOperandVal(5)))
4498 Op, "argument out of range or not a multiple of 4", DAG)
4499 : SDValue();
4500 case Intrinsic::loongarch_lsx_vstelm_w:
4501 return (!isShiftedInt<8, 2>(
4502 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4503 !isUInt<2>(Op.getConstantOperandVal(5)))
4505 Op, "argument out of range or not a multiple of 4", DAG)
4506 : SDValue();
4507 case Intrinsic::loongarch_lasx_xvstelm_d:
4508 return (!isShiftedInt<8, 3>(
4509 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4510 !isUInt<2>(Op.getConstantOperandVal(5)))
4512 Op, "argument out of range or not a multiple of 8", DAG)
4513 : SDValue();
4514 case Intrinsic::loongarch_lsx_vstelm_d:
4515 return (!isShiftedInt<8, 3>(
4516 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4517 !isUInt<1>(Op.getConstantOperandVal(5)))
4519 Op, "argument out of range or not a multiple of 8", DAG)
4520 : SDValue();
4521 }
4522}
4523
4524SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4525 SelectionDAG &DAG) const {
4526 SDLoc DL(Op);
4527 SDValue Lo = Op.getOperand(0);
4528 SDValue Hi = Op.getOperand(1);
4529 SDValue Shamt = Op.getOperand(2);
4530 EVT VT = Lo.getValueType();
4531
4532 // if Shamt-GRLen < 0: // Shamt < GRLen
4533 // Lo = Lo << Shamt
4534 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4535 // else:
4536 // Lo = 0
4537 // Hi = Lo << (Shamt-GRLen)
4538
4539 SDValue Zero = DAG.getConstant(0, DL, VT);
4540 SDValue One = DAG.getConstant(1, DL, VT);
4541 SDValue MinusGRLen =
4542 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4543 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4544 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4545 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4546
4547 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4548 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4549 SDValue ShiftRightLo =
4550 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4551 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4552 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4553 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4554
4555 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4556
4557 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4558 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4559
4560 SDValue Parts[2] = {Lo, Hi};
4561 return DAG.getMergeValues(Parts, DL);
4562}
4563
4564SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4565 SelectionDAG &DAG,
4566 bool IsSRA) const {
4567 SDLoc DL(Op);
4568 SDValue Lo = Op.getOperand(0);
4569 SDValue Hi = Op.getOperand(1);
4570 SDValue Shamt = Op.getOperand(2);
4571 EVT VT = Lo.getValueType();
4572
4573 // SRA expansion:
4574 // if Shamt-GRLen < 0: // Shamt < GRLen
4575 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4576 // Hi = Hi >>s Shamt
4577 // else:
4578 // Lo = Hi >>s (Shamt-GRLen);
4579 // Hi = Hi >>s (GRLen-1)
4580 //
4581 // SRL expansion:
4582 // if Shamt-GRLen < 0: // Shamt < GRLen
4583 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4584 // Hi = Hi >>u Shamt
4585 // else:
4586 // Lo = Hi >>u (Shamt-GRLen);
4587 // Hi = 0;
4588
4589 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4590
4591 SDValue Zero = DAG.getConstant(0, DL, VT);
4592 SDValue One = DAG.getConstant(1, DL, VT);
4593 SDValue MinusGRLen =
4594 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4595 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4596 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4597 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4598
4599 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4600 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4601 SDValue ShiftLeftHi =
4602 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4603 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4604 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4605 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4606 SDValue HiFalse =
4607 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4608
4609 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4610
4611 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4612 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4613
4614 SDValue Parts[2] = {Lo, Hi};
4615 return DAG.getMergeValues(Parts, DL);
4616}
4617
4618// Returns the opcode of the target-specific SDNode that implements the 32-bit
4619// form of the given Opcode.
4620static unsigned getLoongArchWOpcode(unsigned Opcode) {
4621 switch (Opcode) {
4622 default:
4623 llvm_unreachable("Unexpected opcode");
4624 case ISD::SDIV:
4625 return LoongArchISD::DIV_W;
4626 case ISD::UDIV:
4627 return LoongArchISD::DIV_WU;
4628 case ISD::SREM:
4629 return LoongArchISD::MOD_W;
4630 case ISD::UREM:
4631 return LoongArchISD::MOD_WU;
4632 case ISD::SHL:
4633 return LoongArchISD::SLL_W;
4634 case ISD::SRA:
4635 return LoongArchISD::SRA_W;
4636 case ISD::SRL:
4637 return LoongArchISD::SRL_W;
4638 case ISD::ROTL:
4639 case ISD::ROTR:
4640 return LoongArchISD::ROTR_W;
4641 case ISD::CTTZ:
4642 return LoongArchISD::CTZ_W;
4643 case ISD::CTLZ:
4644 return LoongArchISD::CLZ_W;
4645 }
4646}
4647
4648// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4649// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4650// otherwise be promoted to i64, making it difficult to select the
4651// SLL_W/.../*W later one because the fact the operation was originally of
4652// type i8/i16/i32 is lost.
4654 unsigned ExtOpc = ISD::ANY_EXTEND) {
4655 SDLoc DL(N);
4656 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4657 SDValue NewOp0, NewRes;
4658
4659 switch (NumOp) {
4660 default:
4661 llvm_unreachable("Unexpected NumOp");
4662 case 1: {
4663 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4664 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4665 break;
4666 }
4667 case 2: {
4668 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4669 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4670 if (N->getOpcode() == ISD::ROTL) {
4671 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4672 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4673 }
4674 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4675 break;
4676 }
4677 // TODO:Handle more NumOp.
4678 }
4679
4680 // ReplaceNodeResults requires we maintain the same type for the return
4681 // value.
4682 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4683}
4684
4685// Converts the given 32-bit operation to a i64 operation with signed extension
4686// semantic to reduce the signed extension instructions.
4688 SDLoc DL(N);
4689 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4690 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4691 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4692 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4693 DAG.getValueType(MVT::i32));
4694 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4695}
4696
4697// Helper function that emits error message for intrinsics with/without chain
4698// and return a UNDEF or and the chain as the results.
4701 StringRef ErrorMsg, bool WithChain = true) {
4702 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4703 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4704 if (!WithChain)
4705 return;
4706 Results.push_back(N->getOperand(0));
4707}
4708
4709template <unsigned N>
4710static void
4712 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4713 unsigned ResOp) {
4714 const StringRef ErrorMsgOOR = "argument out of range";
4715 unsigned Imm = Node->getConstantOperandVal(2);
4716 if (!isUInt<N>(Imm)) {
4718 /*WithChain=*/false);
4719 return;
4720 }
4721 SDLoc DL(Node);
4722 SDValue Vec = Node->getOperand(1);
4723
4724 SDValue PickElt =
4725 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4726 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4728 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4729 PickElt.getValue(0)));
4730}
4731
4734 SelectionDAG &DAG,
4735 const LoongArchSubtarget &Subtarget,
4736 unsigned ResOp) {
4737 SDLoc DL(N);
4738 SDValue Vec = N->getOperand(1);
4739
4740 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4741 Results.push_back(
4742 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4743}
4744
4745static void
4747 SelectionDAG &DAG,
4748 const LoongArchSubtarget &Subtarget) {
4749 switch (N->getConstantOperandVal(0)) {
4750 default:
4751 llvm_unreachable("Unexpected Intrinsic.");
4752 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4753 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4754 LoongArchISD::VPICK_SEXT_ELT);
4755 break;
4756 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4757 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4758 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4759 LoongArchISD::VPICK_SEXT_ELT);
4760 break;
4761 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4762 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4763 LoongArchISD::VPICK_SEXT_ELT);
4764 break;
4765 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4766 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4767 LoongArchISD::VPICK_ZEXT_ELT);
4768 break;
4769 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4770 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4771 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4772 LoongArchISD::VPICK_ZEXT_ELT);
4773 break;
4774 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4775 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4776 LoongArchISD::VPICK_ZEXT_ELT);
4777 break;
4778 case Intrinsic::loongarch_lsx_bz_b:
4779 case Intrinsic::loongarch_lsx_bz_h:
4780 case Intrinsic::loongarch_lsx_bz_w:
4781 case Intrinsic::loongarch_lsx_bz_d:
4782 case Intrinsic::loongarch_lasx_xbz_b:
4783 case Intrinsic::loongarch_lasx_xbz_h:
4784 case Intrinsic::loongarch_lasx_xbz_w:
4785 case Intrinsic::loongarch_lasx_xbz_d:
4786 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4787 LoongArchISD::VALL_ZERO);
4788 break;
4789 case Intrinsic::loongarch_lsx_bz_v:
4790 case Intrinsic::loongarch_lasx_xbz_v:
4791 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4792 LoongArchISD::VANY_ZERO);
4793 break;
4794 case Intrinsic::loongarch_lsx_bnz_b:
4795 case Intrinsic::loongarch_lsx_bnz_h:
4796 case Intrinsic::loongarch_lsx_bnz_w:
4797 case Intrinsic::loongarch_lsx_bnz_d:
4798 case Intrinsic::loongarch_lasx_xbnz_b:
4799 case Intrinsic::loongarch_lasx_xbnz_h:
4800 case Intrinsic::loongarch_lasx_xbnz_w:
4801 case Intrinsic::loongarch_lasx_xbnz_d:
4802 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4803 LoongArchISD::VALL_NONZERO);
4804 break;
4805 case Intrinsic::loongarch_lsx_bnz_v:
4806 case Intrinsic::loongarch_lasx_xbnz_v:
4807 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4808 LoongArchISD::VANY_NONZERO);
4809 break;
4810 }
4811}
4812
4815 SelectionDAG &DAG) {
4816 assert(N->getValueType(0) == MVT::i128 &&
4817 "AtomicCmpSwap on types less than 128 should be legal");
4818 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4819
4820 unsigned Opcode;
4821 switch (MemOp->getMergedOrdering()) {
4825 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4826 break;
4829 Opcode = LoongArch::PseudoCmpXchg128;
4830 break;
4831 default:
4832 llvm_unreachable("Unexpected ordering!");
4833 }
4834
4835 SDLoc DL(N);
4836 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4837 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4838 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4839 NewVal.first, NewVal.second, N->getOperand(0)};
4840
4841 SDNode *CmpSwap = DAG.getMachineNode(
4842 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4843 Ops);
4844 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4845 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4846 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4847 Results.push_back(SDValue(CmpSwap, 3));
4848}
4849
4852 SDLoc DL(N);
4853 EVT VT = N->getValueType(0);
4854 switch (N->getOpcode()) {
4855 default:
4856 llvm_unreachable("Don't know how to legalize this operation");
4857 case ISD::ADD:
4858 case ISD::SUB:
4859 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4860 "Unexpected custom legalisation");
4861 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4862 break;
4863 case ISD::SDIV:
4864 case ISD::UDIV:
4865 case ISD::SREM:
4866 case ISD::UREM:
4867 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4868 "Unexpected custom legalisation");
4869 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4870 Subtarget.hasDiv32() && VT == MVT::i32
4872 : ISD::SIGN_EXTEND));
4873 break;
4874 case ISD::SHL:
4875 case ISD::SRA:
4876 case ISD::SRL:
4877 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4878 "Unexpected custom legalisation");
4879 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4880 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4881 break;
4882 }
4883 break;
4884 case ISD::ROTL:
4885 case ISD::ROTR:
4886 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4887 "Unexpected custom legalisation");
4888 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4889 break;
4890 case ISD::FP_TO_SINT: {
4891 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4892 "Unexpected custom legalisation");
4893 SDValue Src = N->getOperand(0);
4894 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4895 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4897 if (!isTypeLegal(Src.getValueType()))
4898 return;
4899 if (Src.getValueType() == MVT::f16)
4900 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4901 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4902 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4903 return;
4904 }
4905 // If the FP type needs to be softened, emit a library call using the 'si'
4906 // version. If we left it to default legalization we'd end up with 'di'.
4907 RTLIB::Libcall LC;
4908 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4909 MakeLibCallOptions CallOptions;
4910 EVT OpVT = Src.getValueType();
4911 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4912 SDValue Chain = SDValue();
4913 SDValue Result;
4914 std::tie(Result, Chain) =
4915 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4916 Results.push_back(Result);
4917 break;
4918 }
4919 case ISD::BITCAST: {
4920 SDValue Src = N->getOperand(0);
4921 EVT SrcVT = Src.getValueType();
4922 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4923 Subtarget.hasBasicF()) {
4924 SDValue Dst =
4925 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4927 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4928 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4929 DAG.getVTList(MVT::i32, MVT::i32), Src);
4930 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4931 NewReg.getValue(0), NewReg.getValue(1));
4932 Results.push_back(RetReg);
4933 }
4934 break;
4935 }
4936 case ISD::FP_TO_UINT: {
4937 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4938 "Unexpected custom legalisation");
4939 auto &TLI = DAG.getTargetLoweringInfo();
4940 SDValue Tmp1, Tmp2;
4941 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4942 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4943 break;
4944 }
4945 case ISD::BSWAP: {
4946 SDValue Src = N->getOperand(0);
4947 assert((VT == MVT::i16 || VT == MVT::i32) &&
4948 "Unexpected custom legalization");
4949 MVT GRLenVT = Subtarget.getGRLenVT();
4950 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4951 SDValue Tmp;
4952 switch (VT.getSizeInBits()) {
4953 default:
4954 llvm_unreachable("Unexpected operand width");
4955 case 16:
4956 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4957 break;
4958 case 32:
4959 // Only LA64 will get to here due to the size mismatch between VT and
4960 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4961 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4962 break;
4963 }
4964 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4965 break;
4966 }
4967 case ISD::BITREVERSE: {
4968 SDValue Src = N->getOperand(0);
4969 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4970 "Unexpected custom legalization");
4971 MVT GRLenVT = Subtarget.getGRLenVT();
4972 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4973 SDValue Tmp;
4974 switch (VT.getSizeInBits()) {
4975 default:
4976 llvm_unreachable("Unexpected operand width");
4977 case 8:
4978 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4979 break;
4980 case 32:
4981 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4982 break;
4983 }
4984 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4985 break;
4986 }
4987 case ISD::CTLZ:
4988 case ISD::CTTZ: {
4989 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4990 "Unexpected custom legalisation");
4991 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4992 break;
4993 }
4995 SDValue Chain = N->getOperand(0);
4996 SDValue Op2 = N->getOperand(2);
4997 MVT GRLenVT = Subtarget.getGRLenVT();
4998 const StringRef ErrorMsgOOR = "argument out of range";
4999 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5000 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5001
5002 switch (N->getConstantOperandVal(1)) {
5003 default:
5004 llvm_unreachable("Unexpected Intrinsic.");
5005 case Intrinsic::loongarch_movfcsr2gr: {
5006 if (!Subtarget.hasBasicF()) {
5007 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5008 return;
5009 }
5010 unsigned Imm = Op2->getAsZExtVal();
5011 if (!isUInt<2>(Imm)) {
5012 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5013 return;
5014 }
5015 SDValue MOVFCSR2GRResults = DAG.getNode(
5016 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5017 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5018 Results.push_back(
5019 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5020 Results.push_back(MOVFCSR2GRResults.getValue(1));
5021 break;
5022 }
5023#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5024 case Intrinsic::loongarch_##NAME: { \
5025 SDValue NODE = DAG.getNode( \
5026 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5027 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5028 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5029 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5030 Results.push_back(NODE.getValue(1)); \
5031 break; \
5032 }
5033 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5034 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5035 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5036 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5037 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5038 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5039#undef CRC_CASE_EXT_BINARYOP
5040
5041#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5042 case Intrinsic::loongarch_##NAME: { \
5043 SDValue NODE = DAG.getNode( \
5044 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5045 {Chain, Op2, \
5046 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5047 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5048 Results.push_back(NODE.getValue(1)); \
5049 break; \
5050 }
5051 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5052 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5053#undef CRC_CASE_EXT_UNARYOP
5054#define CSR_CASE(ID) \
5055 case Intrinsic::loongarch_##ID: { \
5056 if (!Subtarget.is64Bit()) \
5057 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5058 break; \
5059 }
5060 CSR_CASE(csrrd_d);
5061 CSR_CASE(csrwr_d);
5062 CSR_CASE(csrxchg_d);
5063 CSR_CASE(iocsrrd_d);
5064#undef CSR_CASE
5065 case Intrinsic::loongarch_csrrd_w: {
5066 unsigned Imm = Op2->getAsZExtVal();
5067 if (!isUInt<14>(Imm)) {
5068 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5069 return;
5070 }
5071 SDValue CSRRDResults =
5072 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5073 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5074 Results.push_back(
5075 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5076 Results.push_back(CSRRDResults.getValue(1));
5077 break;
5078 }
5079 case Intrinsic::loongarch_csrwr_w: {
5080 unsigned Imm = N->getConstantOperandVal(3);
5081 if (!isUInt<14>(Imm)) {
5082 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5083 return;
5084 }
5085 SDValue CSRWRResults =
5086 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5087 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5088 DAG.getConstant(Imm, DL, GRLenVT)});
5089 Results.push_back(
5090 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5091 Results.push_back(CSRWRResults.getValue(1));
5092 break;
5093 }
5094 case Intrinsic::loongarch_csrxchg_w: {
5095 unsigned Imm = N->getConstantOperandVal(4);
5096 if (!isUInt<14>(Imm)) {
5097 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5098 return;
5099 }
5100 SDValue CSRXCHGResults = DAG.getNode(
5101 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5102 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5103 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5104 DAG.getConstant(Imm, DL, GRLenVT)});
5105 Results.push_back(
5106 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5107 Results.push_back(CSRXCHGResults.getValue(1));
5108 break;
5109 }
5110#define IOCSRRD_CASE(NAME, NODE) \
5111 case Intrinsic::loongarch_##NAME: { \
5112 SDValue IOCSRRDResults = \
5113 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5114 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5115 Results.push_back( \
5116 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5117 Results.push_back(IOCSRRDResults.getValue(1)); \
5118 break; \
5119 }
5120 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5121 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5122 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5123#undef IOCSRRD_CASE
5124 case Intrinsic::loongarch_cpucfg: {
5125 SDValue CPUCFGResults =
5126 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5127 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5128 Results.push_back(
5129 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5130 Results.push_back(CPUCFGResults.getValue(1));
5131 break;
5132 }
5133 case Intrinsic::loongarch_lddir_d: {
5134 if (!Subtarget.is64Bit()) {
5135 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5136 return;
5137 }
5138 break;
5139 }
5140 }
5141 break;
5142 }
5143 case ISD::READ_REGISTER: {
5144 if (Subtarget.is64Bit())
5145 DAG.getContext()->emitError(
5146 "On LA64, only 64-bit registers can be read.");
5147 else
5148 DAG.getContext()->emitError(
5149 "On LA32, only 32-bit registers can be read.");
5150 Results.push_back(DAG.getUNDEF(VT));
5151 Results.push_back(N->getOperand(0));
5152 break;
5153 }
5155 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5156 break;
5157 }
5158 case ISD::LROUND: {
5159 SDValue Op0 = N->getOperand(0);
5160 EVT OpVT = Op0.getValueType();
5161 RTLIB::Libcall LC =
5162 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5163 MakeLibCallOptions CallOptions;
5164 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5165 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5166 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5167 Results.push_back(Result);
5168 break;
5169 }
5170 case ISD::ATOMIC_CMP_SWAP: {
5172 break;
5173 }
5174 case ISD::TRUNCATE: {
5175 MVT VT = N->getSimpleValueType(0);
5176 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5177 return;
5178
5179 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5180 SDValue In = N->getOperand(0);
5181 EVT InVT = In.getValueType();
5182 EVT InEltVT = InVT.getVectorElementType();
5183 EVT EltVT = VT.getVectorElementType();
5184 unsigned MinElts = VT.getVectorNumElements();
5185 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5186 unsigned InBits = InVT.getSizeInBits();
5187
5188 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5189 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5190 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5191 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5192 for (unsigned I = 0; I < MinElts; ++I)
5193 TruncMask[I] = Scale * I;
5194
5195 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5196 MVT SVT = In.getSimpleValueType().getScalarType();
5197 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5198 SDValue WidenIn =
5199 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5200 DAG.getVectorIdxConstant(0, DL));
5201 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5202 "Illegal vector type in truncation");
5203 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5204 Results.push_back(
5205 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5206 return;
5207 }
5208 }
5209
5210 break;
5211 }
5212 }
5213}
5214
5215/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5217 SelectionDAG &DAG) {
5218 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5219
5220 MVT VT = N->getSimpleValueType(0);
5221 if (!VT.is128BitVector() && !VT.is256BitVector())
5222 return SDValue();
5223
5224 SDValue X, Y;
5225 SDValue N0 = N->getOperand(0);
5226 SDValue N1 = N->getOperand(1);
5227
5228 if (SDValue Not = isNOT(N0, DAG)) {
5229 X = Not;
5230 Y = N1;
5231 } else if (SDValue Not = isNOT(N1, DAG)) {
5232 X = Not;
5233 Y = N0;
5234 } else
5235 return SDValue();
5236
5237 X = DAG.getBitcast(VT, X);
5238 Y = DAG.getBitcast(VT, Y);
5239 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5240}
5241
5244 const LoongArchSubtarget &Subtarget) {
5245 if (DCI.isBeforeLegalizeOps())
5246 return SDValue();
5247
5248 SDValue FirstOperand = N->getOperand(0);
5249 SDValue SecondOperand = N->getOperand(1);
5250 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5251 EVT ValTy = N->getValueType(0);
5252 SDLoc DL(N);
5253 uint64_t lsb, msb;
5254 unsigned SMIdx, SMLen;
5255 ConstantSDNode *CN;
5256 SDValue NewOperand;
5257 MVT GRLenVT = Subtarget.getGRLenVT();
5258
5259 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5260 return R;
5261
5262 // BSTRPICK requires the 32S feature.
5263 if (!Subtarget.has32S())
5264 return SDValue();
5265
5266 // Op's second operand must be a shifted mask.
5267 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5268 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5269 return SDValue();
5270
5271 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5272 // Pattern match BSTRPICK.
5273 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5274 // => BSTRPICK $dst, $src, msb, lsb
5275 // where msb = lsb + len - 1
5276
5277 // The second operand of the shift must be an immediate.
5278 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5279 return SDValue();
5280
5281 lsb = CN->getZExtValue();
5282
5283 // Return if the shifted mask does not start at bit 0 or the sum of its
5284 // length and lsb exceeds the word's size.
5285 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5286 return SDValue();
5287
5288 NewOperand = FirstOperand.getOperand(0);
5289 } else {
5290 // Pattern match BSTRPICK.
5291 // $dst = and $src, (2**len- 1) , if len > 12
5292 // => BSTRPICK $dst, $src, msb, lsb
5293 // where lsb = 0 and msb = len - 1
5294
5295 // If the mask is <= 0xfff, andi can be used instead.
5296 if (CN->getZExtValue() <= 0xfff)
5297 return SDValue();
5298
5299 // Return if the MSB exceeds.
5300 if (SMIdx + SMLen > ValTy.getSizeInBits())
5301 return SDValue();
5302
5303 if (SMIdx > 0) {
5304 // Omit if the constant has more than 2 uses. This a conservative
5305 // decision. Whether it is a win depends on the HW microarchitecture.
5306 // However it should always be better for 1 and 2 uses.
5307 if (CN->use_size() > 2)
5308 return SDValue();
5309 // Return if the constant can be composed by a single LU12I.W.
5310 if ((CN->getZExtValue() & 0xfff) == 0)
5311 return SDValue();
5312 // Return if the constand can be composed by a single ADDI with
5313 // the zero register.
5314 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5315 return SDValue();
5316 }
5317
5318 lsb = SMIdx;
5319 NewOperand = FirstOperand;
5320 }
5321
5322 msb = lsb + SMLen - 1;
5323 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5324 DAG.getConstant(msb, DL, GRLenVT),
5325 DAG.getConstant(lsb, DL, GRLenVT));
5326 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5327 return NR0;
5328 // Try to optimize to
5329 // bstrpick $Rd, $Rs, msb, lsb
5330 // slli $Rd, $Rd, lsb
5331 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5332 DAG.getConstant(lsb, DL, GRLenVT));
5333}
5334
5337 const LoongArchSubtarget &Subtarget) {
5338 // BSTRPICK requires the 32S feature.
5339 if (!Subtarget.has32S())
5340 return SDValue();
5341
5342 if (DCI.isBeforeLegalizeOps())
5343 return SDValue();
5344
5345 // $dst = srl (and $src, Mask), Shamt
5346 // =>
5347 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5348 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5349 //
5350
5351 SDValue FirstOperand = N->getOperand(0);
5352 ConstantSDNode *CN;
5353 EVT ValTy = N->getValueType(0);
5354 SDLoc DL(N);
5355 MVT GRLenVT = Subtarget.getGRLenVT();
5356 unsigned MaskIdx, MaskLen;
5357 uint64_t Shamt;
5358
5359 // The first operand must be an AND and the second operand of the AND must be
5360 // a shifted mask.
5361 if (FirstOperand.getOpcode() != ISD::AND ||
5362 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5363 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5364 return SDValue();
5365
5366 // The second operand (shift amount) must be an immediate.
5367 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5368 return SDValue();
5369
5370 Shamt = CN->getZExtValue();
5371 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5372 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5373 FirstOperand->getOperand(0),
5374 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5375 DAG.getConstant(Shamt, DL, GRLenVT));
5376
5377 return SDValue();
5378}
5379
5380// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5381// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5382static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5383 unsigned Depth) {
5384 // Limit recursion.
5386 return false;
5387 switch (Src.getOpcode()) {
5388 case ISD::SETCC:
5389 case ISD::TRUNCATE:
5390 return Src.getOperand(0).getValueSizeInBits() == Size;
5391 case ISD::FREEZE:
5392 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5393 case ISD::AND:
5394 case ISD::XOR:
5395 case ISD::OR:
5396 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5397 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5398 case ISD::SELECT:
5399 case ISD::VSELECT:
5400 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5401 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5402 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5403 case ISD::BUILD_VECTOR:
5404 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5405 ISD::isBuildVectorAllOnes(Src.getNode());
5406 }
5407 return false;
5408}
5409
5410// Helper to push sign extension of vXi1 SETCC result through bitops.
5412 SDValue Src, const SDLoc &DL) {
5413 switch (Src.getOpcode()) {
5414 case ISD::SETCC:
5415 case ISD::FREEZE:
5416 case ISD::TRUNCATE:
5417 case ISD::BUILD_VECTOR:
5418 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5419 case ISD::AND:
5420 case ISD::XOR:
5421 case ISD::OR:
5422 return DAG.getNode(
5423 Src.getOpcode(), DL, SExtVT,
5424 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5425 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5426 case ISD::SELECT:
5427 case ISD::VSELECT:
5428 return DAG.getSelect(
5429 DL, SExtVT, Src.getOperand(0),
5430 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5431 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5432 }
5433 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5434}
5435
5436static SDValue
5439 const LoongArchSubtarget &Subtarget) {
5440 SDLoc DL(N);
5441 EVT VT = N->getValueType(0);
5442 SDValue Src = N->getOperand(0);
5443 EVT SrcVT = Src.getValueType();
5444
5445 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5446 return SDValue();
5447
5448 bool UseLASX;
5449 unsigned Opc = ISD::DELETED_NODE;
5450 EVT CmpVT = Src.getOperand(0).getValueType();
5451 EVT EltVT = CmpVT.getVectorElementType();
5452
5453 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5454 UseLASX = false;
5455 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5456 CmpVT.getSizeInBits() == 256)
5457 UseLASX = true;
5458 else
5459 return SDValue();
5460
5461 SDValue SrcN1 = Src.getOperand(1);
5462 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5463 default:
5464 break;
5465 case ISD::SETEQ:
5466 // x == 0 => not (vmsknez.b x)
5467 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5468 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5469 break;
5470 case ISD::SETGT:
5471 // x > -1 => vmskgez.b x
5472 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5473 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5474 break;
5475 case ISD::SETGE:
5476 // x >= 0 => vmskgez.b x
5477 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5478 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5479 break;
5480 case ISD::SETLT:
5481 // x < 0 => vmskltz.{b,h,w,d} x
5482 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5483 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5484 EltVT == MVT::i64))
5485 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5486 break;
5487 case ISD::SETLE:
5488 // x <= -1 => vmskltz.{b,h,w,d} x
5489 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5490 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5491 EltVT == MVT::i64))
5492 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5493 break;
5494 case ISD::SETNE:
5495 // x != 0 => vmsknez.b x
5496 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5497 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5498 break;
5499 }
5500
5501 if (Opc == ISD::DELETED_NODE)
5502 return SDValue();
5503
5504 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5506 V = DAG.getZExtOrTrunc(V, DL, T);
5507 return DAG.getBitcast(VT, V);
5508}
5509
5512 const LoongArchSubtarget &Subtarget) {
5513 SDLoc DL(N);
5514 EVT VT = N->getValueType(0);
5515 SDValue Src = N->getOperand(0);
5516 EVT SrcVT = Src.getValueType();
5517 MVT GRLenVT = Subtarget.getGRLenVT();
5518
5519 if (!DCI.isBeforeLegalizeOps())
5520 return SDValue();
5521
5522 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5523 return SDValue();
5524
5525 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5526 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5527 if (Res)
5528 return Res;
5529
5530 // Generate vXi1 using [X]VMSKLTZ
5531 MVT SExtVT;
5532 unsigned Opc;
5533 bool UseLASX = false;
5534 bool PropagateSExt = false;
5535
5536 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5537 EVT CmpVT = Src.getOperand(0).getValueType();
5538 if (CmpVT.getSizeInBits() > 256)
5539 return SDValue();
5540 }
5541
5542 switch (SrcVT.getSimpleVT().SimpleTy) {
5543 default:
5544 return SDValue();
5545 case MVT::v2i1:
5546 SExtVT = MVT::v2i64;
5547 break;
5548 case MVT::v4i1:
5549 SExtVT = MVT::v4i32;
5550 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5551 SExtVT = MVT::v4i64;
5552 UseLASX = true;
5553 PropagateSExt = true;
5554 }
5555 break;
5556 case MVT::v8i1:
5557 SExtVT = MVT::v8i16;
5558 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5559 SExtVT = MVT::v8i32;
5560 UseLASX = true;
5561 PropagateSExt = true;
5562 }
5563 break;
5564 case MVT::v16i1:
5565 SExtVT = MVT::v16i8;
5566 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5567 SExtVT = MVT::v16i16;
5568 UseLASX = true;
5569 PropagateSExt = true;
5570 }
5571 break;
5572 case MVT::v32i1:
5573 SExtVT = MVT::v32i8;
5574 UseLASX = true;
5575 break;
5576 };
5577 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5578 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5579
5580 SDValue V;
5581 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5582 if (Src.getSimpleValueType() == MVT::v32i8) {
5583 SDValue Lo, Hi;
5584 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5585 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5586 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5587 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5588 DAG.getShiftAmountConstant(16, GRLenVT, DL));
5589 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5590 } else if (UseLASX) {
5591 return SDValue();
5592 }
5593 }
5594
5595 if (!V) {
5596 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5597 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5598 }
5599
5601 V = DAG.getZExtOrTrunc(V, DL, T);
5602 return DAG.getBitcast(VT, V);
5603}
5604
5607 const LoongArchSubtarget &Subtarget) {
5608 MVT GRLenVT = Subtarget.getGRLenVT();
5609 EVT ValTy = N->getValueType(0);
5610 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5611 ConstantSDNode *CN0, *CN1;
5612 SDLoc DL(N);
5613 unsigned ValBits = ValTy.getSizeInBits();
5614 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5615 unsigned Shamt;
5616 bool SwapAndRetried = false;
5617
5618 // BSTRPICK requires the 32S feature.
5619 if (!Subtarget.has32S())
5620 return SDValue();
5621
5622 if (DCI.isBeforeLegalizeOps())
5623 return SDValue();
5624
5625 if (ValBits != 32 && ValBits != 64)
5626 return SDValue();
5627
5628Retry:
5629 // 1st pattern to match BSTRINS:
5630 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5631 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5632 // =>
5633 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5634 if (N0.getOpcode() == ISD::AND &&
5635 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5636 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5637 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5638 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5639 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5640 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5641 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5642 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5643 (MaskIdx0 + MaskLen0 <= ValBits)) {
5644 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5645 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5646 N1.getOperand(0).getOperand(0),
5647 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5648 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5649 }
5650
5651 // 2nd pattern to match BSTRINS:
5652 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5653 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5654 // =>
5655 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5656 if (N0.getOpcode() == ISD::AND &&
5657 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5658 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5659 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5660 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5661 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5662 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5663 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5664 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5665 (MaskIdx0 + MaskLen0 <= ValBits)) {
5666 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5667 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5668 N1.getOperand(0).getOperand(0),
5669 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5670 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5671 }
5672
5673 // 3rd pattern to match BSTRINS:
5674 // R = or (and X, mask0), (and Y, mask1)
5675 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5676 // =>
5677 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5678 // where msb = lsb + size - 1
5679 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5680 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5681 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5682 (MaskIdx0 + MaskLen0 <= 64) &&
5683 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5684 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5685 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5686 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5687 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5688 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5689 DAG.getConstant(ValBits == 32
5690 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5691 : (MaskIdx0 + MaskLen0 - 1),
5692 DL, GRLenVT),
5693 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5694 }
5695
5696 // 4th pattern to match BSTRINS:
5697 // R = or (and X, mask), (shl Y, shamt)
5698 // where mask = (2**shamt - 1)
5699 // =>
5700 // R = BSTRINS X, Y, ValBits - 1, shamt
5701 // where ValBits = 32 or 64
5702 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5703 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5704 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5705 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5706 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5707 (MaskIdx0 + MaskLen0 <= ValBits)) {
5708 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5709 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5710 N1.getOperand(0),
5711 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5712 DAG.getConstant(Shamt, DL, GRLenVT));
5713 }
5714
5715 // 5th pattern to match BSTRINS:
5716 // R = or (and X, mask), const
5717 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5718 // =>
5719 // R = BSTRINS X, (const >> lsb), msb, lsb
5720 // where msb = lsb + size - 1
5721 if (N0.getOpcode() == ISD::AND &&
5722 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5723 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5724 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5725 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5726 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5727 return DAG.getNode(
5728 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5729 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5730 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5731 : (MaskIdx0 + MaskLen0 - 1),
5732 DL, GRLenVT),
5733 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5734 }
5735
5736 // 6th pattern.
5737 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5738 // by the incoming bits are known to be zero.
5739 // =>
5740 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5741 //
5742 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5743 // pattern is more common than the 1st. So we put the 1st before the 6th in
5744 // order to match as many nodes as possible.
5745 ConstantSDNode *CNMask, *CNShamt;
5746 unsigned MaskIdx, MaskLen;
5747 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5748 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5749 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5750 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5751 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5752 Shamt = CNShamt->getZExtValue();
5753 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5754 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5755 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5756 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5757 N1.getOperand(0).getOperand(0),
5758 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5759 DAG.getConstant(Shamt, DL, GRLenVT));
5760 }
5761 }
5762
5763 // 7th pattern.
5764 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5765 // overwritten by the incoming bits are known to be zero.
5766 // =>
5767 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5768 //
5769 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5770 // before the 7th in order to match as many nodes as possible.
5771 if (N1.getOpcode() == ISD::AND &&
5772 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5773 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5774 N1.getOperand(0).getOpcode() == ISD::SHL &&
5775 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5776 CNShamt->getZExtValue() == MaskIdx) {
5777 APInt ShMask(ValBits, CNMask->getZExtValue());
5778 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5779 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5780 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5781 N1.getOperand(0).getOperand(0),
5782 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5783 DAG.getConstant(MaskIdx, DL, GRLenVT));
5784 }
5785 }
5786
5787 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5788 if (!SwapAndRetried) {
5789 std::swap(N0, N1);
5790 SwapAndRetried = true;
5791 goto Retry;
5792 }
5793
5794 SwapAndRetried = false;
5795Retry2:
5796 // 8th pattern.
5797 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5798 // the incoming bits are known to be zero.
5799 // =>
5800 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5801 //
5802 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5803 // we put it here in order to match as many nodes as possible or generate less
5804 // instructions.
5805 if (N1.getOpcode() == ISD::AND &&
5806 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5807 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5808 APInt ShMask(ValBits, CNMask->getZExtValue());
5809 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5810 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5811 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5812 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5813 N1->getOperand(0),
5814 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5815 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5816 DAG.getConstant(MaskIdx, DL, GRLenVT));
5817 }
5818 }
5819 // Swap N0/N1 and retry.
5820 if (!SwapAndRetried) {
5821 std::swap(N0, N1);
5822 SwapAndRetried = true;
5823 goto Retry2;
5824 }
5825
5826 return SDValue();
5827}
5828
5829static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5830 ExtType = ISD::NON_EXTLOAD;
5831
5832 switch (V.getNode()->getOpcode()) {
5833 case ISD::LOAD: {
5834 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5835 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5836 (LoadNode->getMemoryVT() == MVT::i16)) {
5837 ExtType = LoadNode->getExtensionType();
5838 return true;
5839 }
5840 return false;
5841 }
5842 case ISD::AssertSext: {
5843 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5844 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5845 ExtType = ISD::SEXTLOAD;
5846 return true;
5847 }
5848 return false;
5849 }
5850 case ISD::AssertZext: {
5851 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5852 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5853 ExtType = ISD::ZEXTLOAD;
5854 return true;
5855 }
5856 return false;
5857 }
5858 default:
5859 return false;
5860 }
5861
5862 return false;
5863}
5864
5865// Eliminate redundant truncation and zero-extension nodes.
5866// * Case 1:
5867// +------------+ +------------+ +------------+
5868// | Input1 | | Input2 | | CC |
5869// +------------+ +------------+ +------------+
5870// | | |
5871// V V +----+
5872// +------------+ +------------+ |
5873// | TRUNCATE | | TRUNCATE | |
5874// +------------+ +------------+ |
5875// | | |
5876// V V |
5877// +------------+ +------------+ |
5878// | ZERO_EXT | | ZERO_EXT | |
5879// +------------+ +------------+ |
5880// | | |
5881// | +-------------+ |
5882// V V | |
5883// +----------------+ | |
5884// | AND | | |
5885// +----------------+ | |
5886// | | |
5887// +---------------+ | |
5888// | | |
5889// V V V
5890// +-------------+
5891// | CMP |
5892// +-------------+
5893// * Case 2:
5894// +------------+ +------------+ +-------------+ +------------+ +------------+
5895// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5896// +------------+ +------------+ +-------------+ +------------+ +------------+
5897// | | | | |
5898// V | | | |
5899// +------------+ | | | |
5900// | XOR |<---------------------+ | |
5901// +------------+ | | |
5902// | | | |
5903// V V +---------------+ |
5904// +------------+ +------------+ | |
5905// | TRUNCATE | | TRUNCATE | | +-------------------------+
5906// +------------+ +------------+ | |
5907// | | | |
5908// V V | |
5909// +------------+ +------------+ | |
5910// | ZERO_EXT | | ZERO_EXT | | |
5911// +------------+ +------------+ | |
5912// | | | |
5913// V V | |
5914// +----------------+ | |
5915// | AND | | |
5916// +----------------+ | |
5917// | | |
5918// +---------------+ | |
5919// | | |
5920// V V V
5921// +-------------+
5922// | CMP |
5923// +-------------+
5926 const LoongArchSubtarget &Subtarget) {
5927 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5928
5929 SDNode *AndNode = N->getOperand(0).getNode();
5930 if (AndNode->getOpcode() != ISD::AND)
5931 return SDValue();
5932
5933 SDValue AndInputValue2 = AndNode->getOperand(1);
5934 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5935 return SDValue();
5936
5937 SDValue CmpInputValue = N->getOperand(1);
5938 SDValue AndInputValue1 = AndNode->getOperand(0);
5939 if (AndInputValue1.getOpcode() == ISD::XOR) {
5940 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5941 return SDValue();
5942 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5943 if (!CN || !CN->isAllOnes())
5944 return SDValue();
5945 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5946 if (!CN || !CN->isZero())
5947 return SDValue();
5948 AndInputValue1 = AndInputValue1.getOperand(0);
5949 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5950 return SDValue();
5951 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5952 if (AndInputValue2 != CmpInputValue)
5953 return SDValue();
5954 } else {
5955 return SDValue();
5956 }
5957
5958 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5959 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5960 return SDValue();
5961
5962 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5963 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5964 return SDValue();
5965
5966 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5967 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5968 ISD::LoadExtType ExtType1;
5969 ISD::LoadExtType ExtType2;
5970
5971 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5972 !checkValueWidth(TruncInputValue2, ExtType2))
5973 return SDValue();
5974
5975 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5976 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5977 return SDValue();
5978
5979 if ((ExtType2 != ISD::ZEXTLOAD) &&
5980 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5981 return SDValue();
5982
5983 // These truncation and zero-extension nodes are not necessary, remove them.
5984 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5985 TruncInputValue1, TruncInputValue2);
5986 SDValue NewSetCC =
5987 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5988 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5989 return SDValue(N, 0);
5990}
5991
5992// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5995 const LoongArchSubtarget &Subtarget) {
5996 if (DCI.isBeforeLegalizeOps())
5997 return SDValue();
5998
5999 SDValue Src = N->getOperand(0);
6000 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6001 return SDValue();
6002
6003 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6004 Src.getOperand(0));
6005}
6006
6007// Perform common combines for BR_CC and SELECT_CC conditions.
6008static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6009 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6010 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6011
6012 // As far as arithmetic right shift always saves the sign,
6013 // shift can be omitted.
6014 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6015 // setge (sra X, N), 0 -> setge X, 0
6016 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6017 LHS.getOpcode() == ISD::SRA) {
6018 LHS = LHS.getOperand(0);
6019 return true;
6020 }
6021
6022 if (!ISD::isIntEqualitySetCC(CCVal))
6023 return false;
6024
6025 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6026 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6027 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6028 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6029 // If we're looking for eq 0 instead of ne 0, we need to invert the
6030 // condition.
6031 bool Invert = CCVal == ISD::SETEQ;
6032 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6033 if (Invert)
6034 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6035
6036 RHS = LHS.getOperand(1);
6037 LHS = LHS.getOperand(0);
6038 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6039
6040 CC = DAG.getCondCode(CCVal);
6041 return true;
6042 }
6043
6044 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6045 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6046 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6047 SDValue LHS0 = LHS.getOperand(0);
6048 if (LHS0.getOpcode() == ISD::AND &&
6049 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6050 uint64_t Mask = LHS0.getConstantOperandVal(1);
6051 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6052 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6053 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6054 CC = DAG.getCondCode(CCVal);
6055
6056 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6057 LHS = LHS0.getOperand(0);
6058 if (ShAmt != 0)
6059 LHS =
6060 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6061 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6062 return true;
6063 }
6064 }
6065 }
6066
6067 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6068 // This can occur when legalizing some floating point comparisons.
6069 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6070 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6071 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6072 CC = DAG.getCondCode(CCVal);
6073 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6074 return true;
6075 }
6076
6077 return false;
6078}
6079
6082 const LoongArchSubtarget &Subtarget) {
6083 SDValue LHS = N->getOperand(1);
6084 SDValue RHS = N->getOperand(2);
6085 SDValue CC = N->getOperand(3);
6086 SDLoc DL(N);
6087
6088 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6089 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6090 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6091
6092 return SDValue();
6093}
6094
6097 const LoongArchSubtarget &Subtarget) {
6098 // Transform
6099 SDValue LHS = N->getOperand(0);
6100 SDValue RHS = N->getOperand(1);
6101 SDValue CC = N->getOperand(2);
6102 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6103 SDValue TrueV = N->getOperand(3);
6104 SDValue FalseV = N->getOperand(4);
6105 SDLoc DL(N);
6106 EVT VT = N->getValueType(0);
6107
6108 // If the True and False values are the same, we don't need a select_cc.
6109 if (TrueV == FalseV)
6110 return TrueV;
6111
6112 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6113 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6114 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6116 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6117 if (CCVal == ISD::CondCode::SETGE)
6118 std::swap(TrueV, FalseV);
6119
6120 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6121 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6122 // Only handle simm12, if it is not in this range, it can be considered as
6123 // register.
6124 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6125 isInt<12>(TrueSImm - FalseSImm)) {
6126 SDValue SRA =
6127 DAG.getNode(ISD::SRA, DL, VT, LHS,
6128 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6129 SDValue AND =
6130 DAG.getNode(ISD::AND, DL, VT, SRA,
6131 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6132 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6133 }
6134
6135 if (CCVal == ISD::CondCode::SETGE)
6136 std::swap(TrueV, FalseV);
6137 }
6138
6139 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6140 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6141 {LHS, RHS, CC, TrueV, FalseV});
6142
6143 return SDValue();
6144}
6145
6146template <unsigned N>
6148 SelectionDAG &DAG,
6149 const LoongArchSubtarget &Subtarget,
6150 bool IsSigned = false) {
6151 SDLoc DL(Node);
6152 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6153 // Check the ImmArg.
6154 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6155 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6156 DAG.getContext()->emitError(Node->getOperationName(0) +
6157 ": argument out of range.");
6158 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6159 }
6160 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6161}
6162
6163template <unsigned N>
6164static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6165 SelectionDAG &DAG, bool IsSigned = false) {
6166 SDLoc DL(Node);
6167 EVT ResTy = Node->getValueType(0);
6168 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6169
6170 // Check the ImmArg.
6171 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6172 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6173 DAG.getContext()->emitError(Node->getOperationName(0) +
6174 ": argument out of range.");
6175 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6176 }
6177 return DAG.getConstant(
6179 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6180 DL, ResTy);
6181}
6182
6184 SDLoc DL(Node);
6185 EVT ResTy = Node->getValueType(0);
6186 SDValue Vec = Node->getOperand(2);
6187 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6188 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6189}
6190
6192 SDLoc DL(Node);
6193 EVT ResTy = Node->getValueType(0);
6194 SDValue One = DAG.getConstant(1, DL, ResTy);
6195 SDValue Bit =
6196 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6197
6198 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6199 DAG.getNOT(DL, Bit, ResTy));
6200}
6201
6202template <unsigned N>
6204 SDLoc DL(Node);
6205 EVT ResTy = Node->getValueType(0);
6206 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6207 // Check the unsigned ImmArg.
6208 if (!isUInt<N>(CImm->getZExtValue())) {
6209 DAG.getContext()->emitError(Node->getOperationName(0) +
6210 ": argument out of range.");
6211 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6212 }
6213
6214 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6215 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6216
6217 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6218}
6219
6220template <unsigned N>
6222 SDLoc DL(Node);
6223 EVT ResTy = Node->getValueType(0);
6224 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6225 // Check the unsigned ImmArg.
6226 if (!isUInt<N>(CImm->getZExtValue())) {
6227 DAG.getContext()->emitError(Node->getOperationName(0) +
6228 ": argument out of range.");
6229 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6230 }
6231
6232 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6233 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6234 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6235}
6236
6237template <unsigned N>
6239 SDLoc DL(Node);
6240 EVT ResTy = Node->getValueType(0);
6241 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6242 // Check the unsigned ImmArg.
6243 if (!isUInt<N>(CImm->getZExtValue())) {
6244 DAG.getContext()->emitError(Node->getOperationName(0) +
6245 ": argument out of range.");
6246 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6247 }
6248
6249 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6250 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6251 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6252}
6253
6254template <unsigned W>
6256 unsigned ResOp) {
6257 unsigned Imm = N->getConstantOperandVal(2);
6258 if (!isUInt<W>(Imm)) {
6259 const StringRef ErrorMsg = "argument out of range";
6260 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6261 return DAG.getUNDEF(N->getValueType(0));
6262 }
6263 SDLoc DL(N);
6264 SDValue Vec = N->getOperand(1);
6265 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6267 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6268}
6269
6270static SDValue
6273 const LoongArchSubtarget &Subtarget) {
6274 SDLoc DL(N);
6275 switch (N->getConstantOperandVal(0)) {
6276 default:
6277 break;
6278 case Intrinsic::loongarch_lsx_vadd_b:
6279 case Intrinsic::loongarch_lsx_vadd_h:
6280 case Intrinsic::loongarch_lsx_vadd_w:
6281 case Intrinsic::loongarch_lsx_vadd_d:
6282 case Intrinsic::loongarch_lasx_xvadd_b:
6283 case Intrinsic::loongarch_lasx_xvadd_h:
6284 case Intrinsic::loongarch_lasx_xvadd_w:
6285 case Intrinsic::loongarch_lasx_xvadd_d:
6286 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6287 N->getOperand(2));
6288 case Intrinsic::loongarch_lsx_vaddi_bu:
6289 case Intrinsic::loongarch_lsx_vaddi_hu:
6290 case Intrinsic::loongarch_lsx_vaddi_wu:
6291 case Intrinsic::loongarch_lsx_vaddi_du:
6292 case Intrinsic::loongarch_lasx_xvaddi_bu:
6293 case Intrinsic::loongarch_lasx_xvaddi_hu:
6294 case Intrinsic::loongarch_lasx_xvaddi_wu:
6295 case Intrinsic::loongarch_lasx_xvaddi_du:
6296 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6297 lowerVectorSplatImm<5>(N, 2, DAG));
6298 case Intrinsic::loongarch_lsx_vsub_b:
6299 case Intrinsic::loongarch_lsx_vsub_h:
6300 case Intrinsic::loongarch_lsx_vsub_w:
6301 case Intrinsic::loongarch_lsx_vsub_d:
6302 case Intrinsic::loongarch_lasx_xvsub_b:
6303 case Intrinsic::loongarch_lasx_xvsub_h:
6304 case Intrinsic::loongarch_lasx_xvsub_w:
6305 case Intrinsic::loongarch_lasx_xvsub_d:
6306 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6307 N->getOperand(2));
6308 case Intrinsic::loongarch_lsx_vsubi_bu:
6309 case Intrinsic::loongarch_lsx_vsubi_hu:
6310 case Intrinsic::loongarch_lsx_vsubi_wu:
6311 case Intrinsic::loongarch_lsx_vsubi_du:
6312 case Intrinsic::loongarch_lasx_xvsubi_bu:
6313 case Intrinsic::loongarch_lasx_xvsubi_hu:
6314 case Intrinsic::loongarch_lasx_xvsubi_wu:
6315 case Intrinsic::loongarch_lasx_xvsubi_du:
6316 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6317 lowerVectorSplatImm<5>(N, 2, DAG));
6318 case Intrinsic::loongarch_lsx_vneg_b:
6319 case Intrinsic::loongarch_lsx_vneg_h:
6320 case Intrinsic::loongarch_lsx_vneg_w:
6321 case Intrinsic::loongarch_lsx_vneg_d:
6322 case Intrinsic::loongarch_lasx_xvneg_b:
6323 case Intrinsic::loongarch_lasx_xvneg_h:
6324 case Intrinsic::loongarch_lasx_xvneg_w:
6325 case Intrinsic::loongarch_lasx_xvneg_d:
6326 return DAG.getNode(
6327 ISD::SUB, DL, N->getValueType(0),
6328 DAG.getConstant(
6329 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6330 /*isSigned=*/true),
6331 SDLoc(N), N->getValueType(0)),
6332 N->getOperand(1));
6333 case Intrinsic::loongarch_lsx_vmax_b:
6334 case Intrinsic::loongarch_lsx_vmax_h:
6335 case Intrinsic::loongarch_lsx_vmax_w:
6336 case Intrinsic::loongarch_lsx_vmax_d:
6337 case Intrinsic::loongarch_lasx_xvmax_b:
6338 case Intrinsic::loongarch_lasx_xvmax_h:
6339 case Intrinsic::loongarch_lasx_xvmax_w:
6340 case Intrinsic::loongarch_lasx_xvmax_d:
6341 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6342 N->getOperand(2));
6343 case Intrinsic::loongarch_lsx_vmax_bu:
6344 case Intrinsic::loongarch_lsx_vmax_hu:
6345 case Intrinsic::loongarch_lsx_vmax_wu:
6346 case Intrinsic::loongarch_lsx_vmax_du:
6347 case Intrinsic::loongarch_lasx_xvmax_bu:
6348 case Intrinsic::loongarch_lasx_xvmax_hu:
6349 case Intrinsic::loongarch_lasx_xvmax_wu:
6350 case Intrinsic::loongarch_lasx_xvmax_du:
6351 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6352 N->getOperand(2));
6353 case Intrinsic::loongarch_lsx_vmaxi_b:
6354 case Intrinsic::loongarch_lsx_vmaxi_h:
6355 case Intrinsic::loongarch_lsx_vmaxi_w:
6356 case Intrinsic::loongarch_lsx_vmaxi_d:
6357 case Intrinsic::loongarch_lasx_xvmaxi_b:
6358 case Intrinsic::loongarch_lasx_xvmaxi_h:
6359 case Intrinsic::loongarch_lasx_xvmaxi_w:
6360 case Intrinsic::loongarch_lasx_xvmaxi_d:
6361 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6362 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6363 case Intrinsic::loongarch_lsx_vmaxi_bu:
6364 case Intrinsic::loongarch_lsx_vmaxi_hu:
6365 case Intrinsic::loongarch_lsx_vmaxi_wu:
6366 case Intrinsic::loongarch_lsx_vmaxi_du:
6367 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6368 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6369 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6370 case Intrinsic::loongarch_lasx_xvmaxi_du:
6371 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6372 lowerVectorSplatImm<5>(N, 2, DAG));
6373 case Intrinsic::loongarch_lsx_vmin_b:
6374 case Intrinsic::loongarch_lsx_vmin_h:
6375 case Intrinsic::loongarch_lsx_vmin_w:
6376 case Intrinsic::loongarch_lsx_vmin_d:
6377 case Intrinsic::loongarch_lasx_xvmin_b:
6378 case Intrinsic::loongarch_lasx_xvmin_h:
6379 case Intrinsic::loongarch_lasx_xvmin_w:
6380 case Intrinsic::loongarch_lasx_xvmin_d:
6381 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6382 N->getOperand(2));
6383 case Intrinsic::loongarch_lsx_vmin_bu:
6384 case Intrinsic::loongarch_lsx_vmin_hu:
6385 case Intrinsic::loongarch_lsx_vmin_wu:
6386 case Intrinsic::loongarch_lsx_vmin_du:
6387 case Intrinsic::loongarch_lasx_xvmin_bu:
6388 case Intrinsic::loongarch_lasx_xvmin_hu:
6389 case Intrinsic::loongarch_lasx_xvmin_wu:
6390 case Intrinsic::loongarch_lasx_xvmin_du:
6391 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6392 N->getOperand(2));
6393 case Intrinsic::loongarch_lsx_vmini_b:
6394 case Intrinsic::loongarch_lsx_vmini_h:
6395 case Intrinsic::loongarch_lsx_vmini_w:
6396 case Intrinsic::loongarch_lsx_vmini_d:
6397 case Intrinsic::loongarch_lasx_xvmini_b:
6398 case Intrinsic::loongarch_lasx_xvmini_h:
6399 case Intrinsic::loongarch_lasx_xvmini_w:
6400 case Intrinsic::loongarch_lasx_xvmini_d:
6401 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6402 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6403 case Intrinsic::loongarch_lsx_vmini_bu:
6404 case Intrinsic::loongarch_lsx_vmini_hu:
6405 case Intrinsic::loongarch_lsx_vmini_wu:
6406 case Intrinsic::loongarch_lsx_vmini_du:
6407 case Intrinsic::loongarch_lasx_xvmini_bu:
6408 case Intrinsic::loongarch_lasx_xvmini_hu:
6409 case Intrinsic::loongarch_lasx_xvmini_wu:
6410 case Intrinsic::loongarch_lasx_xvmini_du:
6411 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6412 lowerVectorSplatImm<5>(N, 2, DAG));
6413 case Intrinsic::loongarch_lsx_vmul_b:
6414 case Intrinsic::loongarch_lsx_vmul_h:
6415 case Intrinsic::loongarch_lsx_vmul_w:
6416 case Intrinsic::loongarch_lsx_vmul_d:
6417 case Intrinsic::loongarch_lasx_xvmul_b:
6418 case Intrinsic::loongarch_lasx_xvmul_h:
6419 case Intrinsic::loongarch_lasx_xvmul_w:
6420 case Intrinsic::loongarch_lasx_xvmul_d:
6421 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6422 N->getOperand(2));
6423 case Intrinsic::loongarch_lsx_vmadd_b:
6424 case Intrinsic::loongarch_lsx_vmadd_h:
6425 case Intrinsic::loongarch_lsx_vmadd_w:
6426 case Intrinsic::loongarch_lsx_vmadd_d:
6427 case Intrinsic::loongarch_lasx_xvmadd_b:
6428 case Intrinsic::loongarch_lasx_xvmadd_h:
6429 case Intrinsic::loongarch_lasx_xvmadd_w:
6430 case Intrinsic::loongarch_lasx_xvmadd_d: {
6431 EVT ResTy = N->getValueType(0);
6432 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6433 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6434 N->getOperand(3)));
6435 }
6436 case Intrinsic::loongarch_lsx_vmsub_b:
6437 case Intrinsic::loongarch_lsx_vmsub_h:
6438 case Intrinsic::loongarch_lsx_vmsub_w:
6439 case Intrinsic::loongarch_lsx_vmsub_d:
6440 case Intrinsic::loongarch_lasx_xvmsub_b:
6441 case Intrinsic::loongarch_lasx_xvmsub_h:
6442 case Intrinsic::loongarch_lasx_xvmsub_w:
6443 case Intrinsic::loongarch_lasx_xvmsub_d: {
6444 EVT ResTy = N->getValueType(0);
6445 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6446 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6447 N->getOperand(3)));
6448 }
6449 case Intrinsic::loongarch_lsx_vdiv_b:
6450 case Intrinsic::loongarch_lsx_vdiv_h:
6451 case Intrinsic::loongarch_lsx_vdiv_w:
6452 case Intrinsic::loongarch_lsx_vdiv_d:
6453 case Intrinsic::loongarch_lasx_xvdiv_b:
6454 case Intrinsic::loongarch_lasx_xvdiv_h:
6455 case Intrinsic::loongarch_lasx_xvdiv_w:
6456 case Intrinsic::loongarch_lasx_xvdiv_d:
6457 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6458 N->getOperand(2));
6459 case Intrinsic::loongarch_lsx_vdiv_bu:
6460 case Intrinsic::loongarch_lsx_vdiv_hu:
6461 case Intrinsic::loongarch_lsx_vdiv_wu:
6462 case Intrinsic::loongarch_lsx_vdiv_du:
6463 case Intrinsic::loongarch_lasx_xvdiv_bu:
6464 case Intrinsic::loongarch_lasx_xvdiv_hu:
6465 case Intrinsic::loongarch_lasx_xvdiv_wu:
6466 case Intrinsic::loongarch_lasx_xvdiv_du:
6467 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6468 N->getOperand(2));
6469 case Intrinsic::loongarch_lsx_vmod_b:
6470 case Intrinsic::loongarch_lsx_vmod_h:
6471 case Intrinsic::loongarch_lsx_vmod_w:
6472 case Intrinsic::loongarch_lsx_vmod_d:
6473 case Intrinsic::loongarch_lasx_xvmod_b:
6474 case Intrinsic::loongarch_lasx_xvmod_h:
6475 case Intrinsic::loongarch_lasx_xvmod_w:
6476 case Intrinsic::loongarch_lasx_xvmod_d:
6477 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6478 N->getOperand(2));
6479 case Intrinsic::loongarch_lsx_vmod_bu:
6480 case Intrinsic::loongarch_lsx_vmod_hu:
6481 case Intrinsic::loongarch_lsx_vmod_wu:
6482 case Intrinsic::loongarch_lsx_vmod_du:
6483 case Intrinsic::loongarch_lasx_xvmod_bu:
6484 case Intrinsic::loongarch_lasx_xvmod_hu:
6485 case Intrinsic::loongarch_lasx_xvmod_wu:
6486 case Intrinsic::loongarch_lasx_xvmod_du:
6487 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6488 N->getOperand(2));
6489 case Intrinsic::loongarch_lsx_vand_v:
6490 case Intrinsic::loongarch_lasx_xvand_v:
6491 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6492 N->getOperand(2));
6493 case Intrinsic::loongarch_lsx_vor_v:
6494 case Intrinsic::loongarch_lasx_xvor_v:
6495 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6496 N->getOperand(2));
6497 case Intrinsic::loongarch_lsx_vxor_v:
6498 case Intrinsic::loongarch_lasx_xvxor_v:
6499 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6500 N->getOperand(2));
6501 case Intrinsic::loongarch_lsx_vnor_v:
6502 case Intrinsic::loongarch_lasx_xvnor_v: {
6503 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6504 N->getOperand(2));
6505 return DAG.getNOT(DL, Res, Res->getValueType(0));
6506 }
6507 case Intrinsic::loongarch_lsx_vandi_b:
6508 case Intrinsic::loongarch_lasx_xvandi_b:
6509 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6510 lowerVectorSplatImm<8>(N, 2, DAG));
6511 case Intrinsic::loongarch_lsx_vori_b:
6512 case Intrinsic::loongarch_lasx_xvori_b:
6513 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6514 lowerVectorSplatImm<8>(N, 2, DAG));
6515 case Intrinsic::loongarch_lsx_vxori_b:
6516 case Intrinsic::loongarch_lasx_xvxori_b:
6517 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6518 lowerVectorSplatImm<8>(N, 2, DAG));
6519 case Intrinsic::loongarch_lsx_vsll_b:
6520 case Intrinsic::loongarch_lsx_vsll_h:
6521 case Intrinsic::loongarch_lsx_vsll_w:
6522 case Intrinsic::loongarch_lsx_vsll_d:
6523 case Intrinsic::loongarch_lasx_xvsll_b:
6524 case Intrinsic::loongarch_lasx_xvsll_h:
6525 case Intrinsic::loongarch_lasx_xvsll_w:
6526 case Intrinsic::loongarch_lasx_xvsll_d:
6527 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6528 truncateVecElts(N, DAG));
6529 case Intrinsic::loongarch_lsx_vslli_b:
6530 case Intrinsic::loongarch_lasx_xvslli_b:
6531 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6532 lowerVectorSplatImm<3>(N, 2, DAG));
6533 case Intrinsic::loongarch_lsx_vslli_h:
6534 case Intrinsic::loongarch_lasx_xvslli_h:
6535 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6536 lowerVectorSplatImm<4>(N, 2, DAG));
6537 case Intrinsic::loongarch_lsx_vslli_w:
6538 case Intrinsic::loongarch_lasx_xvslli_w:
6539 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6540 lowerVectorSplatImm<5>(N, 2, DAG));
6541 case Intrinsic::loongarch_lsx_vslli_d:
6542 case Intrinsic::loongarch_lasx_xvslli_d:
6543 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6544 lowerVectorSplatImm<6>(N, 2, DAG));
6545 case Intrinsic::loongarch_lsx_vsrl_b:
6546 case Intrinsic::loongarch_lsx_vsrl_h:
6547 case Intrinsic::loongarch_lsx_vsrl_w:
6548 case Intrinsic::loongarch_lsx_vsrl_d:
6549 case Intrinsic::loongarch_lasx_xvsrl_b:
6550 case Intrinsic::loongarch_lasx_xvsrl_h:
6551 case Intrinsic::loongarch_lasx_xvsrl_w:
6552 case Intrinsic::loongarch_lasx_xvsrl_d:
6553 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6554 truncateVecElts(N, DAG));
6555 case Intrinsic::loongarch_lsx_vsrli_b:
6556 case Intrinsic::loongarch_lasx_xvsrli_b:
6557 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6558 lowerVectorSplatImm<3>(N, 2, DAG));
6559 case Intrinsic::loongarch_lsx_vsrli_h:
6560 case Intrinsic::loongarch_lasx_xvsrli_h:
6561 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6562 lowerVectorSplatImm<4>(N, 2, DAG));
6563 case Intrinsic::loongarch_lsx_vsrli_w:
6564 case Intrinsic::loongarch_lasx_xvsrli_w:
6565 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6566 lowerVectorSplatImm<5>(N, 2, DAG));
6567 case Intrinsic::loongarch_lsx_vsrli_d:
6568 case Intrinsic::loongarch_lasx_xvsrli_d:
6569 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6570 lowerVectorSplatImm<6>(N, 2, DAG));
6571 case Intrinsic::loongarch_lsx_vsra_b:
6572 case Intrinsic::loongarch_lsx_vsra_h:
6573 case Intrinsic::loongarch_lsx_vsra_w:
6574 case Intrinsic::loongarch_lsx_vsra_d:
6575 case Intrinsic::loongarch_lasx_xvsra_b:
6576 case Intrinsic::loongarch_lasx_xvsra_h:
6577 case Intrinsic::loongarch_lasx_xvsra_w:
6578 case Intrinsic::loongarch_lasx_xvsra_d:
6579 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6580 truncateVecElts(N, DAG));
6581 case Intrinsic::loongarch_lsx_vsrai_b:
6582 case Intrinsic::loongarch_lasx_xvsrai_b:
6583 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6584 lowerVectorSplatImm<3>(N, 2, DAG));
6585 case Intrinsic::loongarch_lsx_vsrai_h:
6586 case Intrinsic::loongarch_lasx_xvsrai_h:
6587 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6588 lowerVectorSplatImm<4>(N, 2, DAG));
6589 case Intrinsic::loongarch_lsx_vsrai_w:
6590 case Intrinsic::loongarch_lasx_xvsrai_w:
6591 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6592 lowerVectorSplatImm<5>(N, 2, DAG));
6593 case Intrinsic::loongarch_lsx_vsrai_d:
6594 case Intrinsic::loongarch_lasx_xvsrai_d:
6595 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6596 lowerVectorSplatImm<6>(N, 2, DAG));
6597 case Intrinsic::loongarch_lsx_vclz_b:
6598 case Intrinsic::loongarch_lsx_vclz_h:
6599 case Intrinsic::loongarch_lsx_vclz_w:
6600 case Intrinsic::loongarch_lsx_vclz_d:
6601 case Intrinsic::loongarch_lasx_xvclz_b:
6602 case Intrinsic::loongarch_lasx_xvclz_h:
6603 case Intrinsic::loongarch_lasx_xvclz_w:
6604 case Intrinsic::loongarch_lasx_xvclz_d:
6605 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6606 case Intrinsic::loongarch_lsx_vpcnt_b:
6607 case Intrinsic::loongarch_lsx_vpcnt_h:
6608 case Intrinsic::loongarch_lsx_vpcnt_w:
6609 case Intrinsic::loongarch_lsx_vpcnt_d:
6610 case Intrinsic::loongarch_lasx_xvpcnt_b:
6611 case Intrinsic::loongarch_lasx_xvpcnt_h:
6612 case Intrinsic::loongarch_lasx_xvpcnt_w:
6613 case Intrinsic::loongarch_lasx_xvpcnt_d:
6614 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6615 case Intrinsic::loongarch_lsx_vbitclr_b:
6616 case Intrinsic::loongarch_lsx_vbitclr_h:
6617 case Intrinsic::loongarch_lsx_vbitclr_w:
6618 case Intrinsic::loongarch_lsx_vbitclr_d:
6619 case Intrinsic::loongarch_lasx_xvbitclr_b:
6620 case Intrinsic::loongarch_lasx_xvbitclr_h:
6621 case Intrinsic::loongarch_lasx_xvbitclr_w:
6622 case Intrinsic::loongarch_lasx_xvbitclr_d:
6623 return lowerVectorBitClear(N, DAG);
6624 case Intrinsic::loongarch_lsx_vbitclri_b:
6625 case Intrinsic::loongarch_lasx_xvbitclri_b:
6626 return lowerVectorBitClearImm<3>(N, DAG);
6627 case Intrinsic::loongarch_lsx_vbitclri_h:
6628 case Intrinsic::loongarch_lasx_xvbitclri_h:
6629 return lowerVectorBitClearImm<4>(N, DAG);
6630 case Intrinsic::loongarch_lsx_vbitclri_w:
6631 case Intrinsic::loongarch_lasx_xvbitclri_w:
6632 return lowerVectorBitClearImm<5>(N, DAG);
6633 case Intrinsic::loongarch_lsx_vbitclri_d:
6634 case Intrinsic::loongarch_lasx_xvbitclri_d:
6635 return lowerVectorBitClearImm<6>(N, DAG);
6636 case Intrinsic::loongarch_lsx_vbitset_b:
6637 case Intrinsic::loongarch_lsx_vbitset_h:
6638 case Intrinsic::loongarch_lsx_vbitset_w:
6639 case Intrinsic::loongarch_lsx_vbitset_d:
6640 case Intrinsic::loongarch_lasx_xvbitset_b:
6641 case Intrinsic::loongarch_lasx_xvbitset_h:
6642 case Intrinsic::loongarch_lasx_xvbitset_w:
6643 case Intrinsic::loongarch_lasx_xvbitset_d: {
6644 EVT VecTy = N->getValueType(0);
6645 SDValue One = DAG.getConstant(1, DL, VecTy);
6646 return DAG.getNode(
6647 ISD::OR, DL, VecTy, N->getOperand(1),
6648 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6649 }
6650 case Intrinsic::loongarch_lsx_vbitseti_b:
6651 case Intrinsic::loongarch_lasx_xvbitseti_b:
6652 return lowerVectorBitSetImm<3>(N, DAG);
6653 case Intrinsic::loongarch_lsx_vbitseti_h:
6654 case Intrinsic::loongarch_lasx_xvbitseti_h:
6655 return lowerVectorBitSetImm<4>(N, DAG);
6656 case Intrinsic::loongarch_lsx_vbitseti_w:
6657 case Intrinsic::loongarch_lasx_xvbitseti_w:
6658 return lowerVectorBitSetImm<5>(N, DAG);
6659 case Intrinsic::loongarch_lsx_vbitseti_d:
6660 case Intrinsic::loongarch_lasx_xvbitseti_d:
6661 return lowerVectorBitSetImm<6>(N, DAG);
6662 case Intrinsic::loongarch_lsx_vbitrev_b:
6663 case Intrinsic::loongarch_lsx_vbitrev_h:
6664 case Intrinsic::loongarch_lsx_vbitrev_w:
6665 case Intrinsic::loongarch_lsx_vbitrev_d:
6666 case Intrinsic::loongarch_lasx_xvbitrev_b:
6667 case Intrinsic::loongarch_lasx_xvbitrev_h:
6668 case Intrinsic::loongarch_lasx_xvbitrev_w:
6669 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6670 EVT VecTy = N->getValueType(0);
6671 SDValue One = DAG.getConstant(1, DL, VecTy);
6672 return DAG.getNode(
6673 ISD::XOR, DL, VecTy, N->getOperand(1),
6674 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6675 }
6676 case Intrinsic::loongarch_lsx_vbitrevi_b:
6677 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6678 return lowerVectorBitRevImm<3>(N, DAG);
6679 case Intrinsic::loongarch_lsx_vbitrevi_h:
6680 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6681 return lowerVectorBitRevImm<4>(N, DAG);
6682 case Intrinsic::loongarch_lsx_vbitrevi_w:
6683 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6684 return lowerVectorBitRevImm<5>(N, DAG);
6685 case Intrinsic::loongarch_lsx_vbitrevi_d:
6686 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6687 return lowerVectorBitRevImm<6>(N, DAG);
6688 case Intrinsic::loongarch_lsx_vfadd_s:
6689 case Intrinsic::loongarch_lsx_vfadd_d:
6690 case Intrinsic::loongarch_lasx_xvfadd_s:
6691 case Intrinsic::loongarch_lasx_xvfadd_d:
6692 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6693 N->getOperand(2));
6694 case Intrinsic::loongarch_lsx_vfsub_s:
6695 case Intrinsic::loongarch_lsx_vfsub_d:
6696 case Intrinsic::loongarch_lasx_xvfsub_s:
6697 case Intrinsic::loongarch_lasx_xvfsub_d:
6698 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6699 N->getOperand(2));
6700 case Intrinsic::loongarch_lsx_vfmul_s:
6701 case Intrinsic::loongarch_lsx_vfmul_d:
6702 case Intrinsic::loongarch_lasx_xvfmul_s:
6703 case Intrinsic::loongarch_lasx_xvfmul_d:
6704 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6705 N->getOperand(2));
6706 case Intrinsic::loongarch_lsx_vfdiv_s:
6707 case Intrinsic::loongarch_lsx_vfdiv_d:
6708 case Intrinsic::loongarch_lasx_xvfdiv_s:
6709 case Intrinsic::loongarch_lasx_xvfdiv_d:
6710 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6711 N->getOperand(2));
6712 case Intrinsic::loongarch_lsx_vfmadd_s:
6713 case Intrinsic::loongarch_lsx_vfmadd_d:
6714 case Intrinsic::loongarch_lasx_xvfmadd_s:
6715 case Intrinsic::loongarch_lasx_xvfmadd_d:
6716 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6717 N->getOperand(2), N->getOperand(3));
6718 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6719 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6720 N->getOperand(1), N->getOperand(2),
6721 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6722 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6723 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6724 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6725 N->getOperand(1), N->getOperand(2),
6726 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6727 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6728 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6729 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6730 N->getOperand(1), N->getOperand(2),
6731 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6732 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6733 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6734 N->getOperand(1), N->getOperand(2),
6735 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6736 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6737 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6738 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6739 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6740 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6741 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6742 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6743 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6744 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6745 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6746 N->getOperand(1)));
6747 case Intrinsic::loongarch_lsx_vreplve_b:
6748 case Intrinsic::loongarch_lsx_vreplve_h:
6749 case Intrinsic::loongarch_lsx_vreplve_w:
6750 case Intrinsic::loongarch_lsx_vreplve_d:
6751 case Intrinsic::loongarch_lasx_xvreplve_b:
6752 case Intrinsic::loongarch_lasx_xvreplve_h:
6753 case Intrinsic::loongarch_lasx_xvreplve_w:
6754 case Intrinsic::loongarch_lasx_xvreplve_d:
6755 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6756 N->getOperand(1),
6757 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6758 N->getOperand(2)));
6759 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6760 if (!Subtarget.is64Bit())
6761 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6762 break;
6763 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6764 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6765 if (!Subtarget.is64Bit())
6766 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6767 break;
6768 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6769 if (!Subtarget.is64Bit())
6770 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6771 break;
6772 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6773 if (!Subtarget.is64Bit())
6774 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6775 break;
6776 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6777 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6778 if (!Subtarget.is64Bit())
6779 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6780 break;
6781 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6782 if (!Subtarget.is64Bit())
6783 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6784 break;
6785 case Intrinsic::loongarch_lsx_bz_b:
6786 case Intrinsic::loongarch_lsx_bz_h:
6787 case Intrinsic::loongarch_lsx_bz_w:
6788 case Intrinsic::loongarch_lsx_bz_d:
6789 case Intrinsic::loongarch_lasx_xbz_b:
6790 case Intrinsic::loongarch_lasx_xbz_h:
6791 case Intrinsic::loongarch_lasx_xbz_w:
6792 case Intrinsic::loongarch_lasx_xbz_d:
6793 if (!Subtarget.is64Bit())
6794 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6795 N->getOperand(1));
6796 break;
6797 case Intrinsic::loongarch_lsx_bz_v:
6798 case Intrinsic::loongarch_lasx_xbz_v:
6799 if (!Subtarget.is64Bit())
6800 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6801 N->getOperand(1));
6802 break;
6803 case Intrinsic::loongarch_lsx_bnz_b:
6804 case Intrinsic::loongarch_lsx_bnz_h:
6805 case Intrinsic::loongarch_lsx_bnz_w:
6806 case Intrinsic::loongarch_lsx_bnz_d:
6807 case Intrinsic::loongarch_lasx_xbnz_b:
6808 case Intrinsic::loongarch_lasx_xbnz_h:
6809 case Intrinsic::loongarch_lasx_xbnz_w:
6810 case Intrinsic::loongarch_lasx_xbnz_d:
6811 if (!Subtarget.is64Bit())
6812 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6813 N->getOperand(1));
6814 break;
6815 case Intrinsic::loongarch_lsx_bnz_v:
6816 case Intrinsic::loongarch_lasx_xbnz_v:
6817 if (!Subtarget.is64Bit())
6818 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6819 N->getOperand(1));
6820 break;
6821 case Intrinsic::loongarch_lasx_concat_128_s:
6822 case Intrinsic::loongarch_lasx_concat_128_d:
6823 case Intrinsic::loongarch_lasx_concat_128:
6824 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6825 N->getOperand(1), N->getOperand(2));
6826 }
6827 return SDValue();
6828}
6829
6832 const LoongArchSubtarget &Subtarget) {
6833 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6834 // conversion is unnecessary and can be replaced with the
6835 // MOVFR2GR_S_LA64 operand.
6836 SDValue Op0 = N->getOperand(0);
6837 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6838 return Op0.getOperand(0);
6839 return SDValue();
6840}
6841
6844 const LoongArchSubtarget &Subtarget) {
6845 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6846 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6847 // operand.
6848 SDValue Op0 = N->getOperand(0);
6849 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6850 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6851 "Unexpected value type!");
6852 return Op0.getOperand(0);
6853 }
6854 return SDValue();
6855}
6856
6859 const LoongArchSubtarget &Subtarget) {
6860 MVT VT = N->getSimpleValueType(0);
6861 unsigned NumBits = VT.getScalarSizeInBits();
6862
6863 // Simplify the inputs.
6864 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6865 APInt DemandedMask(APInt::getAllOnes(NumBits));
6866 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6867 return SDValue(N, 0);
6868
6869 return SDValue();
6870}
6871
6872static SDValue
6875 const LoongArchSubtarget &Subtarget) {
6876 SDValue Op0 = N->getOperand(0);
6877 SDLoc DL(N);
6878
6879 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6880 // redundant. Instead, use BuildPairF64's operands directly.
6881 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6882 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6883
6884 if (Op0->isUndef()) {
6885 SDValue Lo = DAG.getUNDEF(MVT::i32);
6886 SDValue Hi = DAG.getUNDEF(MVT::i32);
6887 return DCI.CombineTo(N, Lo, Hi);
6888 }
6889
6890 // It's cheaper to materialise two 32-bit integers than to load a double
6891 // from the constant pool and transfer it to integer registers through the
6892 // stack.
6894 APInt V = C->getValueAPF().bitcastToAPInt();
6895 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6896 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6897 return DCI.CombineTo(N, Lo, Hi);
6898 }
6899
6900 return SDValue();
6901}
6902
6903/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6906 const LoongArchSubtarget &Subtarget) {
6907 SDValue N0 = N->getOperand(0);
6908 SDValue N1 = N->getOperand(1);
6909 MVT VT = N->getSimpleValueType(0);
6910 SDLoc DL(N);
6911
6912 // VANDN(undef, x) -> 0
6913 // VANDN(x, undef) -> 0
6914 if (N0.isUndef() || N1.isUndef())
6915 return DAG.getConstant(0, DL, VT);
6916
6917 // VANDN(0, x) -> x
6919 return N1;
6920
6921 // VANDN(x, 0) -> 0
6923 return DAG.getConstant(0, DL, VT);
6924
6925 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6927 return DAG.getNOT(DL, N0, VT);
6928
6929 // Turn VANDN back to AND if input is inverted.
6930 if (SDValue Not = isNOT(N0, DAG))
6931 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6932
6933 // Folds for better commutativity:
6934 if (N1->hasOneUse()) {
6935 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6936 if (SDValue Not = isNOT(N1, DAG))
6937 return DAG.getNOT(
6938 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6939
6940 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6941 // -> NOT(OR(x, SplatVector(-Imm))
6942 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6943 // gain benefits.
6944 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6945 N1.getOpcode() == ISD::BUILD_VECTOR) {
6946 if (SDValue SplatValue =
6947 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6948 if (!N1->isOnlyUserOf(SplatValue.getNode()))
6949 return SDValue();
6950
6951 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6952 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6953 SDValue Not =
6954 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6955 return DAG.getNOT(
6956 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6957 VT);
6958 }
6959 }
6960 }
6961 }
6962
6963 return SDValue();
6964}
6965
6968 const LoongArchSubtarget &Subtarget) {
6969 SDLoc DL(N);
6970 EVT VT = N->getValueType(0);
6971
6972 if (VT != MVT::f32 && VT != MVT::f64)
6973 return SDValue();
6974 if (VT == MVT::f32 && !Subtarget.hasBasicF())
6975 return SDValue();
6976 if (VT == MVT::f64 && !Subtarget.hasBasicD())
6977 return SDValue();
6978
6979 // Only optimize when the source and destination types have the same width.
6980 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
6981 return SDValue();
6982
6983 SDValue Src = N->getOperand(0);
6984 // If the result of an integer load is only used by an integer-to-float
6985 // conversion, use a fp load instead. This eliminates an integer-to-float-move
6986 // (movgr2fr) instruction.
6987 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
6988 // Do not change the width of a volatile load. This condition check is
6989 // inspired by AArch64.
6990 !cast<LoadSDNode>(Src)->isVolatile()) {
6991 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
6992 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
6993 LN0->getPointerInfo(), LN0->getAlign(),
6994 LN0->getMemOperand()->getFlags());
6995
6996 // Make sure successors of the original load stay after it by updating them
6997 // to use the new Chain.
6998 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
6999 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7000 }
7001
7002 return SDValue();
7003}
7004
7005// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7006// logical operations, like in the example below.
7007// or (and (truncate x, truncate y)),
7008// (xor (truncate z, build_vector (constants)))
7009// Given a target type \p VT, we generate
7010// or (and x, y), (xor z, zext(build_vector (constants)))
7011// given x, y and z are of type \p VT. We can do so, if operands are either
7012// truncates from VT types, the second operand is a vector of constants, can
7013// be recursively promoted or is an existing extension we can extend further.
7015 SelectionDAG &DAG,
7016 const LoongArchSubtarget &Subtarget,
7017 unsigned Depth) {
7018 // Limit recursion to avoid excessive compile times.
7020 return SDValue();
7021
7022 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
7023 return SDValue();
7024
7025 SDValue N0 = N.getOperand(0);
7026 SDValue N1 = N.getOperand(1);
7027
7028 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7029 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
7030 return SDValue();
7031
7032 if (SDValue NN0 =
7033 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
7034 N0 = NN0;
7035 else {
7036 // The left side has to be a 'trunc'.
7037 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7038 N0.getOperand(0).getValueType() == VT;
7039 if (LHSTrunc)
7040 N0 = N0.getOperand(0);
7041 else
7042 return SDValue();
7043 }
7044
7045 if (SDValue NN1 =
7046 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
7047 N1 = NN1;
7048 else {
7049 // The right side has to be a 'trunc', a (foldable) constant or an
7050 // existing extension we can extend further.
7051 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7052 N1.getOperand(0).getValueType() == VT;
7053 if (RHSTrunc)
7054 N1 = N1.getOperand(0);
7055 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
7056 Subtarget.hasExtLASX() && N1.hasOneUse())
7057 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
7058 // On 32-bit platform, i64 is an illegal integer scalar type, and
7059 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7060 // future.
7061 else if (SDValue Cst =
7063 N1 = Cst;
7064 else
7065 return SDValue();
7066 }
7067
7068 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
7069}
7070
7071// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7072// is LSX-sized register. In most cases we actually compare or select LASX-sized
7073// registers and mixing the two types creates horrible code. This method
7074// optimizes some of the transition sequences.
7076 SelectionDAG &DAG,
7077 const LoongArchSubtarget &Subtarget) {
7078 EVT VT = N.getValueType();
7079 assert(VT.isVector() && "Expected vector type");
7080 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7081 N.getOpcode() == ISD::ZERO_EXTEND ||
7082 N.getOpcode() == ISD::SIGN_EXTEND) &&
7083 "Invalid Node");
7084
7085 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7086 return SDValue();
7087
7088 SDValue Narrow = N.getOperand(0);
7089 EVT NarrowVT = Narrow.getValueType();
7090
7091 // Generate the wide operation.
7092 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
7093 if (!Op)
7094 return SDValue();
7095 switch (N.getOpcode()) {
7096 default:
7097 llvm_unreachable("Unexpected opcode");
7098 case ISD::ANY_EXTEND:
7099 return Op;
7100 case ISD::ZERO_EXTEND:
7101 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
7102 case ISD::SIGN_EXTEND:
7103 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7104 DAG.getValueType(NarrowVT));
7105 }
7106}
7107
7110 const LoongArchSubtarget &Subtarget) {
7111 EVT VT = N->getValueType(0);
7112 SDLoc DL(N);
7113
7114 if (VT.isVector())
7115 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
7116 return R;
7117
7118 return SDValue();
7119}
7120
7122 DAGCombinerInfo &DCI) const {
7123 SelectionDAG &DAG = DCI.DAG;
7124 switch (N->getOpcode()) {
7125 default:
7126 break;
7127 case ISD::AND:
7128 return performANDCombine(N, DAG, DCI, Subtarget);
7129 case ISD::OR:
7130 return performORCombine(N, DAG, DCI, Subtarget);
7131 case ISD::SETCC:
7132 return performSETCCCombine(N, DAG, DCI, Subtarget);
7133 case ISD::SRL:
7134 return performSRLCombine(N, DAG, DCI, Subtarget);
7135 case ISD::BITCAST:
7136 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7137 case ISD::ANY_EXTEND:
7138 case ISD::ZERO_EXTEND:
7139 case ISD::SIGN_EXTEND:
7140 return performEXTENDCombine(N, DAG, DCI, Subtarget);
7141 case ISD::SINT_TO_FP:
7142 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7143 case LoongArchISD::BITREV_W:
7144 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7145 case LoongArchISD::BR_CC:
7146 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7147 case LoongArchISD::SELECT_CC:
7148 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7150 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7151 case LoongArchISD::MOVGR2FR_W_LA64:
7152 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7153 case LoongArchISD::MOVFR2GR_S_LA64:
7154 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7155 case LoongArchISD::VMSKLTZ:
7156 case LoongArchISD::XVMSKLTZ:
7157 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7158 case LoongArchISD::SPLIT_PAIR_F64:
7159 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7160 case LoongArchISD::VANDN:
7161 return performVANDNCombine(N, DAG, DCI, Subtarget);
7162 }
7163 return SDValue();
7164}
7165
7168 if (!ZeroDivCheck)
7169 return MBB;
7170
7171 // Build instructions:
7172 // MBB:
7173 // div(or mod) $dst, $dividend, $divisor
7174 // bne $divisor, $zero, SinkMBB
7175 // BreakMBB:
7176 // break 7 // BRK_DIVZERO
7177 // SinkMBB:
7178 // fallthrough
7179 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7180 MachineFunction::iterator It = ++MBB->getIterator();
7181 MachineFunction *MF = MBB->getParent();
7182 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7183 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7184 MF->insert(It, BreakMBB);
7185 MF->insert(It, SinkMBB);
7186
7187 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7188 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7189 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7190
7191 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7192 DebugLoc DL = MI.getDebugLoc();
7193 MachineOperand &Divisor = MI.getOperand(2);
7194 Register DivisorReg = Divisor.getReg();
7195
7196 // MBB:
7197 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7198 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7199 .addReg(LoongArch::R0)
7200 .addMBB(SinkMBB);
7201 MBB->addSuccessor(BreakMBB);
7202 MBB->addSuccessor(SinkMBB);
7203
7204 // BreakMBB:
7205 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7206 // definition of BRK_DIVZERO.
7207 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7208 BreakMBB->addSuccessor(SinkMBB);
7209
7210 // Clear Divisor's kill flag.
7211 Divisor.setIsKill(false);
7212
7213 return SinkMBB;
7214}
7215
7216static MachineBasicBlock *
7218 const LoongArchSubtarget &Subtarget) {
7219 unsigned CondOpc;
7220 switch (MI.getOpcode()) {
7221 default:
7222 llvm_unreachable("Unexpected opcode");
7223 case LoongArch::PseudoVBZ:
7224 CondOpc = LoongArch::VSETEQZ_V;
7225 break;
7226 case LoongArch::PseudoVBZ_B:
7227 CondOpc = LoongArch::VSETANYEQZ_B;
7228 break;
7229 case LoongArch::PseudoVBZ_H:
7230 CondOpc = LoongArch::VSETANYEQZ_H;
7231 break;
7232 case LoongArch::PseudoVBZ_W:
7233 CondOpc = LoongArch::VSETANYEQZ_W;
7234 break;
7235 case LoongArch::PseudoVBZ_D:
7236 CondOpc = LoongArch::VSETANYEQZ_D;
7237 break;
7238 case LoongArch::PseudoVBNZ:
7239 CondOpc = LoongArch::VSETNEZ_V;
7240 break;
7241 case LoongArch::PseudoVBNZ_B:
7242 CondOpc = LoongArch::VSETALLNEZ_B;
7243 break;
7244 case LoongArch::PseudoVBNZ_H:
7245 CondOpc = LoongArch::VSETALLNEZ_H;
7246 break;
7247 case LoongArch::PseudoVBNZ_W:
7248 CondOpc = LoongArch::VSETALLNEZ_W;
7249 break;
7250 case LoongArch::PseudoVBNZ_D:
7251 CondOpc = LoongArch::VSETALLNEZ_D;
7252 break;
7253 case LoongArch::PseudoXVBZ:
7254 CondOpc = LoongArch::XVSETEQZ_V;
7255 break;
7256 case LoongArch::PseudoXVBZ_B:
7257 CondOpc = LoongArch::XVSETANYEQZ_B;
7258 break;
7259 case LoongArch::PseudoXVBZ_H:
7260 CondOpc = LoongArch::XVSETANYEQZ_H;
7261 break;
7262 case LoongArch::PseudoXVBZ_W:
7263 CondOpc = LoongArch::XVSETANYEQZ_W;
7264 break;
7265 case LoongArch::PseudoXVBZ_D:
7266 CondOpc = LoongArch::XVSETANYEQZ_D;
7267 break;
7268 case LoongArch::PseudoXVBNZ:
7269 CondOpc = LoongArch::XVSETNEZ_V;
7270 break;
7271 case LoongArch::PseudoXVBNZ_B:
7272 CondOpc = LoongArch::XVSETALLNEZ_B;
7273 break;
7274 case LoongArch::PseudoXVBNZ_H:
7275 CondOpc = LoongArch::XVSETALLNEZ_H;
7276 break;
7277 case LoongArch::PseudoXVBNZ_W:
7278 CondOpc = LoongArch::XVSETALLNEZ_W;
7279 break;
7280 case LoongArch::PseudoXVBNZ_D:
7281 CondOpc = LoongArch::XVSETALLNEZ_D;
7282 break;
7283 }
7284
7285 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7286 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7287 DebugLoc DL = MI.getDebugLoc();
7290
7291 MachineFunction *F = BB->getParent();
7292 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7293 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7294 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7295
7296 F->insert(It, FalseBB);
7297 F->insert(It, TrueBB);
7298 F->insert(It, SinkBB);
7299
7300 // Transfer the remainder of MBB and its successor edges to Sink.
7301 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7303
7304 // Insert the real instruction to BB.
7305 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7306 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7307
7308 // Insert branch.
7309 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7310 BB->addSuccessor(FalseBB);
7311 BB->addSuccessor(TrueBB);
7312
7313 // FalseBB.
7314 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7315 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7316 .addReg(LoongArch::R0)
7317 .addImm(0);
7318 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7319 FalseBB->addSuccessor(SinkBB);
7320
7321 // TrueBB.
7322 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7323 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7324 .addReg(LoongArch::R0)
7325 .addImm(1);
7326 TrueBB->addSuccessor(SinkBB);
7327
7328 // SinkBB: merge the results.
7329 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7330 MI.getOperand(0).getReg())
7331 .addReg(RD1)
7332 .addMBB(FalseBB)
7333 .addReg(RD2)
7334 .addMBB(TrueBB);
7335
7336 // The pseudo instruction is gone now.
7337 MI.eraseFromParent();
7338 return SinkBB;
7339}
7340
7341static MachineBasicBlock *
7343 const LoongArchSubtarget &Subtarget) {
7344 unsigned InsOp;
7345 unsigned BroadcastOp;
7346 unsigned HalfSize;
7347 switch (MI.getOpcode()) {
7348 default:
7349 llvm_unreachable("Unexpected opcode");
7350 case LoongArch::PseudoXVINSGR2VR_B:
7351 HalfSize = 16;
7352 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7353 InsOp = LoongArch::XVEXTRINS_B;
7354 break;
7355 case LoongArch::PseudoXVINSGR2VR_H:
7356 HalfSize = 8;
7357 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7358 InsOp = LoongArch::XVEXTRINS_H;
7359 break;
7360 }
7361 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7362 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7363 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7364 DebugLoc DL = MI.getDebugLoc();
7366 // XDst = vector_insert XSrc, Elt, Idx
7367 Register XDst = MI.getOperand(0).getReg();
7368 Register XSrc = MI.getOperand(1).getReg();
7369 Register Elt = MI.getOperand(2).getReg();
7370 unsigned Idx = MI.getOperand(3).getImm();
7371
7372 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7373 Idx < HalfSize) {
7374 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7375 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7376
7377 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7378 .addReg(XSrc, {}, LoongArch::sub_128);
7379 BuildMI(*BB, MI, DL,
7380 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7381 : LoongArch::VINSGR2VR_B),
7382 ScratchSubReg2)
7383 .addReg(ScratchSubReg1)
7384 .addReg(Elt)
7385 .addImm(Idx);
7386
7387 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7388 .addReg(ScratchSubReg2)
7389 .addImm(LoongArch::sub_128);
7390 } else {
7391 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7392 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7393
7394 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7395
7396 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7397 .addReg(ScratchReg1)
7398 .addReg(XSrc)
7399 .addImm(Idx >= HalfSize ? 48 : 18);
7400
7401 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7402 .addReg(XSrc)
7403 .addReg(ScratchReg2)
7404 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7405 }
7406
7407 MI.eraseFromParent();
7408 return BB;
7409}
7410
7413 const LoongArchSubtarget &Subtarget) {
7414 assert(Subtarget.hasExtLSX());
7415 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7416 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7417 DebugLoc DL = MI.getDebugLoc();
7419 Register Dst = MI.getOperand(0).getReg();
7420 Register Src = MI.getOperand(1).getReg();
7421 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7422 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7423 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7424
7425 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7426 BuildMI(*BB, MI, DL,
7427 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7428 : LoongArch::VINSGR2VR_W),
7429 ScratchReg2)
7430 .addReg(ScratchReg1)
7431 .addReg(Src)
7432 .addImm(0);
7433 BuildMI(
7434 *BB, MI, DL,
7435 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7436 ScratchReg3)
7437 .addReg(ScratchReg2);
7438 BuildMI(*BB, MI, DL,
7439 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7440 : LoongArch::VPICKVE2GR_W),
7441 Dst)
7442 .addReg(ScratchReg3)
7443 .addImm(0);
7444
7445 MI.eraseFromParent();
7446 return BB;
7447}
7448
7449static MachineBasicBlock *
7451 const LoongArchSubtarget &Subtarget) {
7452 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7453 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7454 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7456 Register Dst = MI.getOperand(0).getReg();
7457 Register Src = MI.getOperand(1).getReg();
7458 DebugLoc DL = MI.getDebugLoc();
7459 unsigned EleBits = 8;
7460 unsigned NotOpc = 0;
7461 unsigned MskOpc;
7462
7463 switch (MI.getOpcode()) {
7464 default:
7465 llvm_unreachable("Unexpected opcode");
7466 case LoongArch::PseudoVMSKLTZ_B:
7467 MskOpc = LoongArch::VMSKLTZ_B;
7468 break;
7469 case LoongArch::PseudoVMSKLTZ_H:
7470 MskOpc = LoongArch::VMSKLTZ_H;
7471 EleBits = 16;
7472 break;
7473 case LoongArch::PseudoVMSKLTZ_W:
7474 MskOpc = LoongArch::VMSKLTZ_W;
7475 EleBits = 32;
7476 break;
7477 case LoongArch::PseudoVMSKLTZ_D:
7478 MskOpc = LoongArch::VMSKLTZ_D;
7479 EleBits = 64;
7480 break;
7481 case LoongArch::PseudoVMSKGEZ_B:
7482 MskOpc = LoongArch::VMSKGEZ_B;
7483 break;
7484 case LoongArch::PseudoVMSKEQZ_B:
7485 MskOpc = LoongArch::VMSKNZ_B;
7486 NotOpc = LoongArch::VNOR_V;
7487 break;
7488 case LoongArch::PseudoVMSKNEZ_B:
7489 MskOpc = LoongArch::VMSKNZ_B;
7490 break;
7491 case LoongArch::PseudoXVMSKLTZ_B:
7492 MskOpc = LoongArch::XVMSKLTZ_B;
7493 RC = &LoongArch::LASX256RegClass;
7494 break;
7495 case LoongArch::PseudoXVMSKLTZ_H:
7496 MskOpc = LoongArch::XVMSKLTZ_H;
7497 RC = &LoongArch::LASX256RegClass;
7498 EleBits = 16;
7499 break;
7500 case LoongArch::PseudoXVMSKLTZ_W:
7501 MskOpc = LoongArch::XVMSKLTZ_W;
7502 RC = &LoongArch::LASX256RegClass;
7503 EleBits = 32;
7504 break;
7505 case LoongArch::PseudoXVMSKLTZ_D:
7506 MskOpc = LoongArch::XVMSKLTZ_D;
7507 RC = &LoongArch::LASX256RegClass;
7508 EleBits = 64;
7509 break;
7510 case LoongArch::PseudoXVMSKGEZ_B:
7511 MskOpc = LoongArch::XVMSKGEZ_B;
7512 RC = &LoongArch::LASX256RegClass;
7513 break;
7514 case LoongArch::PseudoXVMSKEQZ_B:
7515 MskOpc = LoongArch::XVMSKNZ_B;
7516 NotOpc = LoongArch::XVNOR_V;
7517 RC = &LoongArch::LASX256RegClass;
7518 break;
7519 case LoongArch::PseudoXVMSKNEZ_B:
7520 MskOpc = LoongArch::XVMSKNZ_B;
7521 RC = &LoongArch::LASX256RegClass;
7522 break;
7523 }
7524
7525 Register Msk = MRI.createVirtualRegister(RC);
7526 if (NotOpc) {
7527 Register Tmp = MRI.createVirtualRegister(RC);
7528 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7529 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7530 .addReg(Tmp, RegState::Kill)
7531 .addReg(Tmp, RegState::Kill);
7532 } else {
7533 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7534 }
7535
7536 if (TRI->getRegSizeInBits(*RC) > 128) {
7537 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7538 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7539 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7540 .addReg(Msk)
7541 .addImm(0);
7542 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7543 .addReg(Msk, RegState::Kill)
7544 .addImm(4);
7545 BuildMI(*BB, MI, DL,
7546 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7547 : LoongArch::BSTRINS_W),
7548 Dst)
7551 .addImm(256 / EleBits - 1)
7552 .addImm(128 / EleBits);
7553 } else {
7554 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7555 .addReg(Msk, RegState::Kill)
7556 .addImm(0);
7557 }
7558
7559 MI.eraseFromParent();
7560 return BB;
7561}
7562
7563static MachineBasicBlock *
7565 const LoongArchSubtarget &Subtarget) {
7566 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7567 "Unexpected instruction");
7568
7569 MachineFunction &MF = *BB->getParent();
7570 DebugLoc DL = MI.getDebugLoc();
7572 Register LoReg = MI.getOperand(0).getReg();
7573 Register HiReg = MI.getOperand(1).getReg();
7574 Register SrcReg = MI.getOperand(2).getReg();
7575
7576 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7577 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7578 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7579 MI.eraseFromParent(); // The pseudo instruction is gone now.
7580 return BB;
7581}
7582
7583static MachineBasicBlock *
7585 const LoongArchSubtarget &Subtarget) {
7586 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7587 "Unexpected instruction");
7588
7589 MachineFunction &MF = *BB->getParent();
7590 DebugLoc DL = MI.getDebugLoc();
7593 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7594 Register DstReg = MI.getOperand(0).getReg();
7595 Register LoReg = MI.getOperand(1).getReg();
7596 Register HiReg = MI.getOperand(2).getReg();
7597
7598 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7599 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7600 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7601 .addReg(TmpReg, RegState::Kill)
7602 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7603 MI.eraseFromParent(); // The pseudo instruction is gone now.
7604 return BB;
7605}
7606
7608 switch (MI.getOpcode()) {
7609 default:
7610 return false;
7611 case LoongArch::Select_GPR_Using_CC_GPR:
7612 return true;
7613 }
7614}
7615
7616static MachineBasicBlock *
7618 const LoongArchSubtarget &Subtarget) {
7619 // To "insert" Select_* instructions, we actually have to insert the triangle
7620 // control-flow pattern. The incoming instructions know the destination vreg
7621 // to set, the condition code register to branch on, the true/false values to
7622 // select between, and the condcode to use to select the appropriate branch.
7623 //
7624 // We produce the following control flow:
7625 // HeadMBB
7626 // | \
7627 // | IfFalseMBB
7628 // | /
7629 // TailMBB
7630 //
7631 // When we find a sequence of selects we attempt to optimize their emission
7632 // by sharing the control flow. Currently we only handle cases where we have
7633 // multiple selects with the exact same condition (same LHS, RHS and CC).
7634 // The selects may be interleaved with other instructions if the other
7635 // instructions meet some requirements we deem safe:
7636 // - They are not pseudo instructions.
7637 // - They are debug instructions. Otherwise,
7638 // - They do not have side-effects, do not access memory and their inputs do
7639 // not depend on the results of the select pseudo-instructions.
7640 // The TrueV/FalseV operands of the selects cannot depend on the result of
7641 // previous selects in the sequence.
7642 // These conditions could be further relaxed. See the X86 target for a
7643 // related approach and more information.
7644
7645 Register LHS = MI.getOperand(1).getReg();
7646 Register RHS;
7647 if (MI.getOperand(2).isReg())
7648 RHS = MI.getOperand(2).getReg();
7649 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7650
7651 SmallVector<MachineInstr *, 4> SelectDebugValues;
7652 SmallSet<Register, 4> SelectDests;
7653 SelectDests.insert(MI.getOperand(0).getReg());
7654
7655 MachineInstr *LastSelectPseudo = &MI;
7656 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7657 SequenceMBBI != E; ++SequenceMBBI) {
7658 if (SequenceMBBI->isDebugInstr())
7659 continue;
7660 if (isSelectPseudo(*SequenceMBBI)) {
7661 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7662 !SequenceMBBI->getOperand(2).isReg() ||
7663 SequenceMBBI->getOperand(2).getReg() != RHS ||
7664 SequenceMBBI->getOperand(3).getImm() != CC ||
7665 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7666 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7667 break;
7668 LastSelectPseudo = &*SequenceMBBI;
7669 SequenceMBBI->collectDebugValues(SelectDebugValues);
7670 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7671 continue;
7672 }
7673 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7674 SequenceMBBI->mayLoadOrStore() ||
7675 SequenceMBBI->usesCustomInsertionHook())
7676 break;
7677 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7678 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7679 }))
7680 break;
7681 }
7682
7683 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7684 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7685 DebugLoc DL = MI.getDebugLoc();
7687
7688 MachineBasicBlock *HeadMBB = BB;
7689 MachineFunction *F = BB->getParent();
7690 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7691 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7692
7693 F->insert(I, IfFalseMBB);
7694 F->insert(I, TailMBB);
7695
7696 // Set the call frame size on entry to the new basic blocks.
7697 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7698 IfFalseMBB->setCallFrameSize(CallFrameSize);
7699 TailMBB->setCallFrameSize(CallFrameSize);
7700
7701 // Transfer debug instructions associated with the selects to TailMBB.
7702 for (MachineInstr *DebugInstr : SelectDebugValues) {
7703 TailMBB->push_back(DebugInstr->removeFromParent());
7704 }
7705
7706 // Move all instructions after the sequence to TailMBB.
7707 TailMBB->splice(TailMBB->end(), HeadMBB,
7708 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7709 // Update machine-CFG edges by transferring all successors of the current
7710 // block to the new block which will contain the Phi nodes for the selects.
7711 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7712 // Set the successors for HeadMBB.
7713 HeadMBB->addSuccessor(IfFalseMBB);
7714 HeadMBB->addSuccessor(TailMBB);
7715
7716 // Insert appropriate branch.
7717 if (MI.getOperand(2).isImm())
7718 BuildMI(HeadMBB, DL, TII.get(CC))
7719 .addReg(LHS)
7720 .addImm(MI.getOperand(2).getImm())
7721 .addMBB(TailMBB);
7722 else
7723 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7724
7725 // IfFalseMBB just falls through to TailMBB.
7726 IfFalseMBB->addSuccessor(TailMBB);
7727
7728 // Create PHIs for all of the select pseudo-instructions.
7729 auto SelectMBBI = MI.getIterator();
7730 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7731 auto InsertionPoint = TailMBB->begin();
7732 while (SelectMBBI != SelectEnd) {
7733 auto Next = std::next(SelectMBBI);
7734 if (isSelectPseudo(*SelectMBBI)) {
7735 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7736 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7737 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7738 .addReg(SelectMBBI->getOperand(4).getReg())
7739 .addMBB(HeadMBB)
7740 .addReg(SelectMBBI->getOperand(5).getReg())
7741 .addMBB(IfFalseMBB);
7742 SelectMBBI->eraseFromParent();
7743 }
7744 SelectMBBI = Next;
7745 }
7746
7747 F->getProperties().resetNoPHIs();
7748 return TailMBB;
7749}
7750
7751MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7752 MachineInstr &MI, MachineBasicBlock *BB) const {
7753 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7754 DebugLoc DL = MI.getDebugLoc();
7755
7756 switch (MI.getOpcode()) {
7757 default:
7758 llvm_unreachable("Unexpected instr type to insert");
7759 case LoongArch::DIV_W:
7760 case LoongArch::DIV_WU:
7761 case LoongArch::MOD_W:
7762 case LoongArch::MOD_WU:
7763 case LoongArch::DIV_D:
7764 case LoongArch::DIV_DU:
7765 case LoongArch::MOD_D:
7766 case LoongArch::MOD_DU:
7767 return insertDivByZeroTrap(MI, BB);
7768 break;
7769 case LoongArch::WRFCSR: {
7770 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7771 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7772 .addReg(MI.getOperand(1).getReg());
7773 MI.eraseFromParent();
7774 return BB;
7775 }
7776 case LoongArch::RDFCSR: {
7777 MachineInstr *ReadFCSR =
7778 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7779 MI.getOperand(0).getReg())
7780 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7781 ReadFCSR->getOperand(1).setIsUndef();
7782 MI.eraseFromParent();
7783 return BB;
7784 }
7785 case LoongArch::Select_GPR_Using_CC_GPR:
7786 return emitSelectPseudo(MI, BB, Subtarget);
7787 case LoongArch::BuildPairF64Pseudo:
7788 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7789 case LoongArch::SplitPairF64Pseudo:
7790 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7791 case LoongArch::PseudoVBZ:
7792 case LoongArch::PseudoVBZ_B:
7793 case LoongArch::PseudoVBZ_H:
7794 case LoongArch::PseudoVBZ_W:
7795 case LoongArch::PseudoVBZ_D:
7796 case LoongArch::PseudoVBNZ:
7797 case LoongArch::PseudoVBNZ_B:
7798 case LoongArch::PseudoVBNZ_H:
7799 case LoongArch::PseudoVBNZ_W:
7800 case LoongArch::PseudoVBNZ_D:
7801 case LoongArch::PseudoXVBZ:
7802 case LoongArch::PseudoXVBZ_B:
7803 case LoongArch::PseudoXVBZ_H:
7804 case LoongArch::PseudoXVBZ_W:
7805 case LoongArch::PseudoXVBZ_D:
7806 case LoongArch::PseudoXVBNZ:
7807 case LoongArch::PseudoXVBNZ_B:
7808 case LoongArch::PseudoXVBNZ_H:
7809 case LoongArch::PseudoXVBNZ_W:
7810 case LoongArch::PseudoXVBNZ_D:
7811 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7812 case LoongArch::PseudoXVINSGR2VR_B:
7813 case LoongArch::PseudoXVINSGR2VR_H:
7814 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7815 case LoongArch::PseudoCTPOP:
7816 return emitPseudoCTPOP(MI, BB, Subtarget);
7817 case LoongArch::PseudoVMSKLTZ_B:
7818 case LoongArch::PseudoVMSKLTZ_H:
7819 case LoongArch::PseudoVMSKLTZ_W:
7820 case LoongArch::PseudoVMSKLTZ_D:
7821 case LoongArch::PseudoVMSKGEZ_B:
7822 case LoongArch::PseudoVMSKEQZ_B:
7823 case LoongArch::PseudoVMSKNEZ_B:
7824 case LoongArch::PseudoXVMSKLTZ_B:
7825 case LoongArch::PseudoXVMSKLTZ_H:
7826 case LoongArch::PseudoXVMSKLTZ_W:
7827 case LoongArch::PseudoXVMSKLTZ_D:
7828 case LoongArch::PseudoXVMSKGEZ_B:
7829 case LoongArch::PseudoXVMSKEQZ_B:
7830 case LoongArch::PseudoXVMSKNEZ_B:
7831 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7832 case TargetOpcode::STATEPOINT:
7833 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7834 // while bl call instruction (where statepoint will be lowered at the
7835 // end) has implicit def. This def is early-clobber as it will be set at
7836 // the moment of the call and earlier than any use is read.
7837 // Add this implicit dead def here as a workaround.
7838 MI.addOperand(*MI.getMF(),
7840 LoongArch::R1, /*isDef*/ true,
7841 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7842 /*isUndef*/ false, /*isEarlyClobber*/ true));
7843 if (!Subtarget.is64Bit())
7844 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7845 return emitPatchPoint(MI, BB);
7846 }
7847}
7848
7850 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7851 unsigned *Fast) const {
7852 if (!Subtarget.hasUAL())
7853 return false;
7854
7855 // TODO: set reasonable speed number.
7856 if (Fast)
7857 *Fast = 1;
7858 return true;
7859}
7860
7861//===----------------------------------------------------------------------===//
7862// Calling Convention Implementation
7863//===----------------------------------------------------------------------===//
7864
7865// Eight general-purpose registers a0-a7 used for passing integer arguments,
7866// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7867// fixed-point arguments, and floating-point arguments when no FPR is available
7868// or with soft float ABI.
7869const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7870 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7871 LoongArch::R10, LoongArch::R11};
7872
7873// PreserveNone calling convention:
7874// Arguments may be passed in any general-purpose registers except:
7875// - R1 : return address register
7876// - R22 : frame pointer
7877// - R31 : base pointer
7878//
7879// All general-purpose registers are treated as caller-saved,
7880// except R1 (RA) and R22 (FP).
7881//
7882// Non-volatile registers are allocated first so that a function
7883// can call normal functions without having to spill and reload
7884// argument registers.
7886 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
7887 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
7888 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
7889 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
7890 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
7891 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
7892 LoongArch::R20};
7893
7894// Eight floating-point registers fa0-fa7 used for passing floating-point
7895// arguments, and fa0-fa1 are also used to return values.
7896const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7897 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7898 LoongArch::F6, LoongArch::F7};
7899// FPR32 and FPR64 alias each other.
7901 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7902 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7903
7904const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7905 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7906 LoongArch::VR6, LoongArch::VR7};
7907
7908const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7909 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7910 LoongArch::XR6, LoongArch::XR7};
7911
7913 switch (State.getCallingConv()) {
7915 if (!State.isVarArg())
7916 return State.AllocateReg(PreserveNoneArgGPRs);
7917 [[fallthrough]];
7918 default:
7919 return State.AllocateReg(ArgGPRs);
7920 }
7921}
7922
7923// Pass a 2*GRLen argument that has been split into two GRLen values through
7924// registers or the stack as necessary.
7925static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7926 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7927 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7928 ISD::ArgFlagsTy ArgFlags2) {
7929 unsigned GRLenInBytes = GRLen / 8;
7930 if (Register Reg = allocateArgGPR(State)) {
7931 // At least one half can be passed via register.
7932 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7933 VA1.getLocVT(), CCValAssign::Full));
7934 } else {
7935 // Both halves must be passed on the stack, with proper alignment.
7936 Align StackAlign =
7937 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7938 State.addLoc(
7940 State.AllocateStack(GRLenInBytes, StackAlign),
7941 VA1.getLocVT(), CCValAssign::Full));
7942 State.addLoc(CCValAssign::getMem(
7943 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7944 LocVT2, CCValAssign::Full));
7945 return false;
7946 }
7947 if (Register Reg = allocateArgGPR(State)) {
7948 // The second half can also be passed via register.
7949 State.addLoc(
7950 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7951 } else {
7952 // The second half is passed via the stack, without additional alignment.
7953 State.addLoc(CCValAssign::getMem(
7954 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7955 LocVT2, CCValAssign::Full));
7956 }
7957 return false;
7958}
7959
7960// Implements the LoongArch calling convention. Returns true upon failure.
7962 unsigned ValNo, MVT ValVT,
7963 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7964 CCState &State, bool IsRet, Type *OrigTy) {
7965 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7966 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7967 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7968 MVT LocVT = ValVT;
7969
7970 // Any return value split into more than two values can't be returned
7971 // directly.
7972 if (IsRet && ValNo > 1)
7973 return true;
7974
7975 // If passing a variadic argument, or if no FPR is available.
7976 bool UseGPRForFloat = true;
7977
7978 switch (ABI) {
7979 default:
7980 llvm_unreachable("Unexpected ABI");
7981 break;
7986 UseGPRForFloat = ArgFlags.isVarArg();
7987 break;
7990 break;
7991 }
7992
7993 // If this is a variadic argument, the LoongArch calling convention requires
7994 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7995 // byte alignment. An aligned register should be used regardless of whether
7996 // the original argument was split during legalisation or not. The argument
7997 // will not be passed by registers if the original type is larger than
7998 // 2*GRLen, so the register alignment rule does not apply.
7999 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8000 if (ArgFlags.isVarArg() &&
8001 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8002 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
8003 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8004 // Skip 'odd' register if necessary.
8005 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8006 State.AllocateReg(ArgGPRs);
8007 }
8008
8009 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8010 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8011 State.getPendingArgFlags();
8012
8013 assert(PendingLocs.size() == PendingArgFlags.size() &&
8014 "PendingLocs and PendingArgFlags out of sync");
8015
8016 // FPR32 and FPR64 alias each other.
8017 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
8018 UseGPRForFloat = true;
8019
8020 if (UseGPRForFloat && ValVT == MVT::f32) {
8021 LocVT = GRLenVT;
8022 LocInfo = CCValAssign::BCvt;
8023 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8024 LocVT = MVT::i64;
8025 LocInfo = CCValAssign::BCvt;
8026 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8027 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8028 // registers are exhausted.
8029 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8030 // Depending on available argument GPRS, f64 may be passed in a pair of
8031 // GPRs, split between a GPR and the stack, or passed completely on the
8032 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8033 // cases.
8034 MCRegister Reg = allocateArgGPR(State);
8035 if (!Reg) {
8036 int64_t StackOffset = State.AllocateStack(8, Align(8));
8037 State.addLoc(
8038 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8039 return false;
8040 }
8041 LocVT = MVT::i32;
8042 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8043 MCRegister HiReg = allocateArgGPR(State);
8044 if (HiReg) {
8045 State.addLoc(
8046 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
8047 } else {
8048 int64_t StackOffset = State.AllocateStack(4, Align(4));
8049 State.addLoc(
8050 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8051 }
8052 return false;
8053 }
8054
8055 // Split arguments might be passed indirectly, so keep track of the pending
8056 // values.
8057 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8058 LocVT = GRLenVT;
8059 LocInfo = CCValAssign::Indirect;
8060 PendingLocs.push_back(
8061 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
8062 PendingArgFlags.push_back(ArgFlags);
8063 if (!ArgFlags.isSplitEnd()) {
8064 return false;
8065 }
8066 }
8067
8068 // If the split argument only had two elements, it should be passed directly
8069 // in registers or on the stack.
8070 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8071 PendingLocs.size() <= 2) {
8072 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8073 // Apply the normal calling convention rules to the first half of the
8074 // split argument.
8075 CCValAssign VA = PendingLocs[0];
8076 ISD::ArgFlagsTy AF = PendingArgFlags[0];
8077 PendingLocs.clear();
8078 PendingArgFlags.clear();
8079 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
8080 ArgFlags);
8081 }
8082
8083 // Allocate to a register if possible, or else a stack slot.
8084 Register Reg;
8085 unsigned StoreSizeBytes = GRLen / 8;
8086 Align StackAlign = Align(GRLen / 8);
8087
8088 if (ValVT == MVT::f32 && !UseGPRForFloat) {
8089 Reg = State.AllocateReg(ArgFPR32s);
8090 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
8091 Reg = State.AllocateReg(ArgFPR64s);
8092 } else if (ValVT.is128BitVector()) {
8093 Reg = State.AllocateReg(ArgVRs);
8094 UseGPRForFloat = false;
8095 StoreSizeBytes = 16;
8096 StackAlign = Align(16);
8097 } else if (ValVT.is256BitVector()) {
8098 Reg = State.AllocateReg(ArgXRs);
8099 UseGPRForFloat = false;
8100 StoreSizeBytes = 32;
8101 StackAlign = Align(32);
8102 } else {
8103 Reg = allocateArgGPR(State);
8104 }
8105
8106 unsigned StackOffset =
8107 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8108
8109 // If we reach this point and PendingLocs is non-empty, we must be at the
8110 // end of a split argument that must be passed indirectly.
8111 if (!PendingLocs.empty()) {
8112 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8113 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8114 for (auto &It : PendingLocs) {
8115 if (Reg)
8116 It.convertToReg(Reg);
8117 else
8118 It.convertToMem(StackOffset);
8119 State.addLoc(It);
8120 }
8121 PendingLocs.clear();
8122 PendingArgFlags.clear();
8123 return false;
8124 }
8125 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8126 "Expected an GRLenVT at this stage");
8127
8128 if (Reg) {
8129 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8130 return false;
8131 }
8132
8133 // When a floating-point value is passed on the stack, no bit-cast is needed.
8134 if (ValVT.isFloatingPoint()) {
8135 LocVT = ValVT;
8136 LocInfo = CCValAssign::Full;
8137 }
8138
8139 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8140 return false;
8141}
8142
8143void LoongArchTargetLowering::analyzeInputArgs(
8144 MachineFunction &MF, CCState &CCInfo,
8145 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8146 LoongArchCCAssignFn Fn) const {
8147 FunctionType *FType = MF.getFunction().getFunctionType();
8148 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8149 MVT ArgVT = Ins[i].VT;
8150 Type *ArgTy = nullptr;
8151 if (IsRet)
8152 ArgTy = FType->getReturnType();
8153 else if (Ins[i].isOrigArg())
8154 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8156 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8157 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8158 CCInfo, IsRet, ArgTy)) {
8159 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8160 << '\n');
8161 llvm_unreachable("");
8162 }
8163 }
8164}
8165
8166void LoongArchTargetLowering::analyzeOutputArgs(
8167 MachineFunction &MF, CCState &CCInfo,
8168 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8169 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8170 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8171 MVT ArgVT = Outs[i].VT;
8172 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8174 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8175 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8176 CCInfo, IsRet, OrigTy)) {
8177 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8178 << "\n");
8179 llvm_unreachable("");
8180 }
8181 }
8182}
8183
8184// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8185// values.
8187 const CCValAssign &VA, const SDLoc &DL) {
8188 switch (VA.getLocInfo()) {
8189 default:
8190 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8191 case CCValAssign::Full:
8193 break;
8194 case CCValAssign::BCvt:
8195 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8196 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8197 else
8198 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8199 break;
8200 }
8201 return Val;
8202}
8203
8205 const CCValAssign &VA, const SDLoc &DL,
8206 const ISD::InputArg &In,
8207 const LoongArchTargetLowering &TLI) {
8210 EVT LocVT = VA.getLocVT();
8211 SDValue Val;
8212 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8213 Register VReg = RegInfo.createVirtualRegister(RC);
8214 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8215 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8216
8217 // If input is sign extended from 32 bits, note it for the OptW pass.
8218 if (In.isOrigArg()) {
8219 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8220 if (OrigArg->getType()->isIntegerTy()) {
8221 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8222 // An input zero extended from i31 can also be considered sign extended.
8223 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8224 (BitWidth < 32 && In.Flags.isZExt())) {
8227 LAFI->addSExt32Register(VReg);
8228 }
8229 }
8230 }
8231
8232 return convertLocVTToValVT(DAG, Val, VA, DL);
8233}
8234
8235// The caller is responsible for loading the full value if the argument is
8236// passed with CCValAssign::Indirect.
8238 const CCValAssign &VA, const SDLoc &DL) {
8240 MachineFrameInfo &MFI = MF.getFrameInfo();
8241 EVT ValVT = VA.getValVT();
8242 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8243 /*IsImmutable=*/true);
8244 SDValue FIN = DAG.getFrameIndex(
8246
8247 ISD::LoadExtType ExtType;
8248 switch (VA.getLocInfo()) {
8249 default:
8250 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8251 case CCValAssign::Full:
8253 case CCValAssign::BCvt:
8254 ExtType = ISD::NON_EXTLOAD;
8255 break;
8256 }
8257 return DAG.getExtLoad(
8258 ExtType, DL, VA.getLocVT(), Chain, FIN,
8260}
8261
8263 const CCValAssign &VA,
8264 const CCValAssign &HiVA,
8265 const SDLoc &DL) {
8266 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8267 "Unexpected VA");
8269 MachineFrameInfo &MFI = MF.getFrameInfo();
8271
8272 assert(VA.isRegLoc() && "Expected register VA assignment");
8273
8274 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8275 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8276 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8277 SDValue Hi;
8278 if (HiVA.isMemLoc()) {
8279 // Second half of f64 is passed on the stack.
8280 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8281 /*IsImmutable=*/true);
8282 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8283 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8285 } else {
8286 // Second half of f64 is passed in another GPR.
8287 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8288 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8289 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8290 }
8291 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8292}
8293
8295 const CCValAssign &VA, const SDLoc &DL) {
8296 EVT LocVT = VA.getLocVT();
8297
8298 switch (VA.getLocInfo()) {
8299 default:
8300 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8301 case CCValAssign::Full:
8302 break;
8303 case CCValAssign::BCvt:
8304 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8305 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8306 else
8307 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8308 break;
8309 }
8310 return Val;
8311}
8312
8313static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8314 CCValAssign::LocInfo LocInfo,
8315 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8316 CCState &State) {
8317 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8318 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8319 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8320 static const MCPhysReg GPRList[] = {
8321 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8322 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8323 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8324 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8325 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8326 return false;
8327 }
8328 }
8329
8330 if (LocVT == MVT::f32) {
8331 // Pass in STG registers: F1, F2, F3, F4
8332 // fs0,fs1,fs2,fs3
8333 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8334 LoongArch::F26, LoongArch::F27};
8335 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8336 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8337 return false;
8338 }
8339 }
8340
8341 if (LocVT == MVT::f64) {
8342 // Pass in STG registers: D1, D2, D3, D4
8343 // fs4,fs5,fs6,fs7
8344 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8345 LoongArch::F30_64, LoongArch::F31_64};
8346 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8347 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8348 return false;
8349 }
8350 }
8351
8352 report_fatal_error("No registers left in GHC calling convention");
8353 return true;
8354}
8355
8356// Transform physical registers into virtual registers.
8358 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8359 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8360 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8361
8363 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8364
8365 switch (CallConv) {
8366 default:
8367 llvm_unreachable("Unsupported calling convention");
8368 case CallingConv::C:
8369 case CallingConv::Fast:
8372 break;
8373 case CallingConv::GHC:
8374 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8375 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8377 "GHC calling convention requires the F and D extensions");
8378 }
8379
8380 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8381 MVT GRLenVT = Subtarget.getGRLenVT();
8382 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8383 // Used with varargs to acumulate store chains.
8384 std::vector<SDValue> OutChains;
8385
8386 // Assign locations to all of the incoming arguments.
8388 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8389
8390 if (CallConv == CallingConv::GHC)
8392 else
8393 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8394
8395 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8396 CCValAssign &VA = ArgLocs[i];
8397 SDValue ArgValue;
8398 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8399 // case.
8400 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8401 assert(VA.needsCustom());
8402 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8403 } else if (VA.isRegLoc())
8404 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8405 else
8406 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8407 if (VA.getLocInfo() == CCValAssign::Indirect) {
8408 // If the original argument was split and passed by reference, we need to
8409 // load all parts of it here (using the same address).
8410 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8412 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8413 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8414 assert(ArgPartOffset == 0);
8415 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8416 CCValAssign &PartVA = ArgLocs[i + 1];
8417 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8418 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8419 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8420 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8422 ++i;
8423 ++InsIdx;
8424 }
8425 continue;
8426 }
8427 InVals.push_back(ArgValue);
8428 if (Ins[InsIdx].Flags.isByVal())
8429 LoongArchFI->addIncomingByValArgs(ArgValue);
8430 }
8431
8432 if (IsVarArg) {
8434 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8435 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8436 MachineFrameInfo &MFI = MF.getFrameInfo();
8437 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8438
8439 // Offset of the first variable argument from stack pointer, and size of
8440 // the vararg save area. For now, the varargs save area is either zero or
8441 // large enough to hold a0-a7.
8442 int VaArgOffset, VarArgsSaveSize;
8443
8444 // If all registers are allocated, then all varargs must be passed on the
8445 // stack and we don't need to save any argregs.
8446 if (ArgRegs.size() == Idx) {
8447 VaArgOffset = CCInfo.getStackSize();
8448 VarArgsSaveSize = 0;
8449 } else {
8450 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8451 VaArgOffset = -VarArgsSaveSize;
8452 }
8453
8454 // Record the frame index of the first variable argument
8455 // which is a value necessary to VASTART.
8456 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8457 LoongArchFI->setVarArgsFrameIndex(FI);
8458
8459 // If saving an odd number of registers then create an extra stack slot to
8460 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8461 // offsets to even-numbered registered remain 2*GRLen-aligned.
8462 if (Idx % 2) {
8463 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8464 true);
8465 VarArgsSaveSize += GRLenInBytes;
8466 }
8467
8468 // Copy the integer registers that may have been used for passing varargs
8469 // to the vararg save area.
8470 for (unsigned I = Idx; I < ArgRegs.size();
8471 ++I, VaArgOffset += GRLenInBytes) {
8472 const Register Reg = RegInfo.createVirtualRegister(RC);
8473 RegInfo.addLiveIn(ArgRegs[I], Reg);
8474 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8475 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8476 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8477 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8479 cast<StoreSDNode>(Store.getNode())
8480 ->getMemOperand()
8481 ->setValue((Value *)nullptr);
8482 OutChains.push_back(Store);
8483 }
8484 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8485 }
8486
8487 LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8488
8489 // All stores are grouped in one node to allow the matching between
8490 // the size of Ins and InVals. This only happens for vararg functions.
8491 if (!OutChains.empty()) {
8492 OutChains.push_back(Chain);
8493 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8494 }
8495
8496 return Chain;
8497}
8498
8500 return CI->isTailCall();
8501}
8502
8503// Check if the return value is used as only a return value, as otherwise
8504// we can't perform a tail-call.
8506 SDValue &Chain) const {
8507 if (N->getNumValues() != 1)
8508 return false;
8509 if (!N->hasNUsesOfValue(1, 0))
8510 return false;
8511
8512 SDNode *Copy = *N->user_begin();
8513 if (Copy->getOpcode() != ISD::CopyToReg)
8514 return false;
8515
8516 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8517 // isn't safe to perform a tail call.
8518 if (Copy->getGluedNode())
8519 return false;
8520
8521 // The copy must be used by a LoongArchISD::RET, and nothing else.
8522 bool HasRet = false;
8523 for (SDNode *Node : Copy->users()) {
8524 if (Node->getOpcode() != LoongArchISD::RET)
8525 return false;
8526 HasRet = true;
8527 }
8528
8529 if (!HasRet)
8530 return false;
8531
8532 Chain = Copy->getOperand(0);
8533 return true;
8534}
8535
8536// Check whether the call is eligible for tail call optimization.
8537bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8538 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8539 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8540
8541 auto CalleeCC = CLI.CallConv;
8542 auto &Outs = CLI.Outs;
8543 auto &Caller = MF.getFunction();
8544 auto CallerCC = Caller.getCallingConv();
8545 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8546
8547 // If the stack arguments for this call do not fit into our own save area then
8548 // the call cannot be made tail.
8549 if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8550 return false;
8551
8552 // Do not tail call opt if any parameters need to be passed indirectly.
8553 for (auto &VA : ArgLocs)
8554 if (VA.getLocInfo() == CCValAssign::Indirect)
8555 return false;
8556
8557 // Do not tail call opt if either caller or callee uses struct return
8558 // semantics.
8559 auto IsCallerStructRet = Caller.hasStructRetAttr();
8560 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8561 if (IsCallerStructRet != IsCalleeStructRet)
8562 return false;
8563
8564 // Do not tail call opt if caller's and callee's byval arguments do not match.
8565 for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
8566 if (!Outs[i].Flags.isByVal())
8567 continue;
8568 if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8569 return false;
8570 if (LoongArchFI->getIncomingByValArgs(i).getValueType() != Outs[i].ArgVT)
8571 return false;
8572 }
8573
8574 // The callee has to preserve all registers the caller needs to preserve.
8575 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8576 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8577 if (CalleeCC != CallerCC) {
8578 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8579 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8580 return false;
8581 }
8582
8583 // If the callee takes no arguments then go on to check the results of the
8584 // call.
8585 const MachineRegisterInfo &MRI = MF.getRegInfo();
8586 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8587 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
8588 return false;
8589
8590 return true;
8591}
8592
8594 return DAG.getDataLayout().getPrefTypeAlign(
8595 VT.getTypeForEVT(*DAG.getContext()));
8596}
8597
8598// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8599// and output parameter nodes.
8600SDValue
8602 SmallVectorImpl<SDValue> &InVals) const {
8603 SelectionDAG &DAG = CLI.DAG;
8604 SDLoc &DL = CLI.DL;
8606 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8608 SDValue Chain = CLI.Chain;
8609 SDValue Callee = CLI.Callee;
8610 CallingConv::ID CallConv = CLI.CallConv;
8611 bool IsVarArg = CLI.IsVarArg;
8612 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8613 MVT GRLenVT = Subtarget.getGRLenVT();
8614 bool &IsTailCall = CLI.IsTailCall;
8615
8617 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8618
8619 // Analyze the operands of the call, assigning locations to each operand.
8621 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8622
8623 if (CallConv == CallingConv::GHC)
8624 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8625 else
8626 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8627
8628 // Check if it's really possible to do a tail call.
8629 if (IsTailCall)
8630 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8631
8632 if (IsTailCall)
8633 ++NumTailCalls;
8634 else if (CLI.CB && CLI.CB->isMustTailCall())
8635 report_fatal_error("failed to perform tail call elimination on a call "
8636 "site marked musttail");
8637
8638 // Get a count of how many bytes are to be pushed on the stack.
8639 unsigned NumBytes = ArgCCInfo.getStackSize();
8640
8641 // Create local copies for byval args.
8642 SmallVector<SDValue> ByValArgs;
8643 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
8644 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8645 if (!Flags.isByVal())
8646 continue;
8647
8648 SDValue Arg = OutVals[i];
8649 unsigned Size = Flags.getByValSize();
8650 Align Alignment = Flags.getNonZeroByValAlign();
8651 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8652 SDValue Dst;
8653
8654 if (IsTailCall) {
8655 SDValue CallerArg = LoongArchFI->getIncomingByValArgs(j++);
8658 Dst = CallerArg;
8659 } else {
8660 int FI =
8661 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8662 Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8663 }
8664 if (Dst) {
8665 Chain =
8666 DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment,
8667 /*IsVolatile=*/false,
8668 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8670 ByValArgs.push_back(Dst);
8671 }
8672 }
8673
8674 if (!IsTailCall)
8675 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8676
8677 // During a tail call, stores to the argument area must happen after all of
8678 // the function's incoming arguments have been loaded because they may alias.
8679 // This is done by folding in a TokenFactor from LowerFormalArguments, but
8680 // there's no point in doing so repeatedly so this tracks whether that's
8681 // happened yet.
8682 bool AfterFormalArgLoads = false;
8683
8684 // Copy argument values to their designated locations.
8686 SmallVector<SDValue> MemOpChains;
8687 SDValue StackPtr;
8688 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8689 ++i, ++OutIdx) {
8690 CCValAssign &VA = ArgLocs[i];
8691 SDValue ArgValue = OutVals[OutIdx];
8692 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8693
8694 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8695 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8696 assert(VA.isRegLoc() && "Expected register VA assignment");
8697 assert(VA.needsCustom());
8698 SDValue SplitF64 =
8699 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8700 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8701 SDValue Lo = SplitF64.getValue(0);
8702 SDValue Hi = SplitF64.getValue(1);
8703
8704 Register RegLo = VA.getLocReg();
8705 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8706
8707 // Get the CCValAssign for the Hi part.
8708 CCValAssign &HiVA = ArgLocs[++i];
8709
8710 if (HiVA.isMemLoc()) {
8711 // Second half of f64 is passed on the stack.
8712 if (!StackPtr.getNode())
8713 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8715 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8716 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8717 // Emit the store.
8718 MemOpChains.push_back(DAG.getStore(
8719 Chain, DL, Hi, Address,
8721 } else {
8722 // Second half of f64 is passed in another GPR.
8723 Register RegHigh = HiVA.getLocReg();
8724 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8725 }
8726 continue;
8727 }
8728
8729 // Promote the value if needed.
8730 // For now, only handle fully promoted and indirect arguments.
8731 if (VA.getLocInfo() == CCValAssign::Indirect) {
8732 // Store the argument in a stack slot and pass its address.
8733 Align StackAlign =
8734 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8735 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8736 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8737 // If the original argument was split and passed by reference, we need to
8738 // store the required parts of it here (and pass just one address).
8739 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8740 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8741 assert(ArgPartOffset == 0);
8742 // Calculate the total size to store. We don't have access to what we're
8743 // actually storing other than performing the loop and collecting the
8744 // info.
8746 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8747 SDValue PartValue = OutVals[OutIdx + 1];
8748 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8749 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8750 EVT PartVT = PartValue.getValueType();
8751
8752 StoredSize += PartVT.getStoreSize();
8753 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8754 Parts.push_back(std::make_pair(PartValue, Offset));
8755 ++i;
8756 ++OutIdx;
8757 }
8758 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8759 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8760 MemOpChains.push_back(
8761 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8763 for (const auto &Part : Parts) {
8764 SDValue PartValue = Part.first;
8765 SDValue PartOffset = Part.second;
8767 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8768 MemOpChains.push_back(
8769 DAG.getStore(Chain, DL, PartValue, Address,
8771 }
8772 ArgValue = SpillSlot;
8773 } else {
8774 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8775 }
8776
8777 // Use local copy if it is a byval arg.
8778 if (Flags.isByVal()) {
8779 if (!IsTailCall || (isa<GlobalAddressSDNode>(ArgValue) ||
8780 isa<ExternalSymbolSDNode>(ArgValue) ||
8781 isa<FrameIndexSDNode>(ArgValue)))
8782 ArgValue = ByValArgs[j++];
8783 }
8784
8785 if (VA.isRegLoc()) {
8786 // Queue up the argument copies and emit them at the end.
8787 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8788 } else {
8789 assert(VA.isMemLoc() && "Argument not register or memory");
8790 SDValue DstAddr;
8791 MachinePointerInfo DstInfo;
8792 int32_t Offset = VA.getLocMemOffset();
8793
8794 // Work out the address of the stack slot.
8795 if (!StackPtr.getNode())
8796 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8797
8798 if (IsTailCall) {
8799 unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8);
8800 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
8801 DstAddr = DAG.getFrameIndex(FI, PtrVT);
8802 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8803 if (!AfterFormalArgLoads) {
8804 Chain = DAG.getStackArgumentTokenFactor(Chain);
8805 AfterFormalArgLoads = true;
8806 }
8807 } else {
8808 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
8809 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
8810 DstInfo = MachinePointerInfo::getStack(MF, Offset);
8811 }
8812
8813 // Emit the store.
8814 MemOpChains.push_back(
8815 DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
8816 }
8817 }
8818
8819 // Join the stores, which are independent of one another.
8820 if (!MemOpChains.empty())
8821 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8822
8823 SDValue Glue;
8824
8825 // Build a sequence of copy-to-reg nodes, chained and glued together.
8826 for (auto &Reg : RegsToPass) {
8827 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8828 Glue = Chain.getValue(1);
8829 }
8830
8831 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8832 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8833 // split it and then direct call can be matched by PseudoCALL_SMALL.
8835 const GlobalValue *GV = S->getGlobal();
8836 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8839 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8840 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8841 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8844 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8845 }
8846
8847 // The first call operand is the chain and the second is the target address.
8849 Ops.push_back(Chain);
8850 Ops.push_back(Callee);
8851
8852 // Add argument registers to the end of the list so that they are
8853 // known live into the call.
8854 for (auto &Reg : RegsToPass)
8855 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8856
8857 if (!IsTailCall) {
8858 // Add a register mask operand representing the call-preserved registers.
8859 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8860 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8861 assert(Mask && "Missing call preserved mask for calling convention");
8862 Ops.push_back(DAG.getRegisterMask(Mask));
8863 }
8864
8865 // Glue the call to the argument copies, if any.
8866 if (Glue.getNode())
8867 Ops.push_back(Glue);
8868
8869 // Emit the call.
8870 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8871 unsigned Op;
8872 switch (DAG.getTarget().getCodeModel()) {
8873 default:
8874 report_fatal_error("Unsupported code model");
8875 case CodeModel::Small:
8876 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8877 break;
8878 case CodeModel::Medium:
8879 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8880 break;
8881 case CodeModel::Large:
8882 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8883 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8884 break;
8885 }
8886
8887 if (IsTailCall) {
8889 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8890 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8891 return Ret;
8892 }
8893
8894 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8895 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8896 Glue = Chain.getValue(1);
8897
8898 // Mark the end of the call, which is glued to the call itself.
8899 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8900 Glue = Chain.getValue(1);
8901
8902 // Assign locations to each value returned by this call.
8904 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8905 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8906
8907 // Copy all of the result registers out of their specified physreg.
8908 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8909 auto &VA = RVLocs[i];
8910 // Copy the value out.
8911 SDValue RetValue =
8912 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8913 // Glue the RetValue to the end of the call sequence.
8914 Chain = RetValue.getValue(1);
8915 Glue = RetValue.getValue(2);
8916
8917 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8918 assert(VA.needsCustom());
8919 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8920 MVT::i32, Glue);
8921 Chain = RetValue2.getValue(1);
8922 Glue = RetValue2.getValue(2);
8923 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8924 RetValue, RetValue2);
8925 } else
8926 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8927
8928 InVals.push_back(RetValue);
8929 }
8930
8931 return Chain;
8932}
8933
8935 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8936 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8937 const Type *RetTy) const {
8939 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8940
8941 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8942 LoongArchABI::ABI ABI =
8943 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8944 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8945 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8946 return false;
8947 }
8948 return true;
8949}
8950
8952 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8954 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8955 SelectionDAG &DAG) const {
8956 // Stores the assignment of the return value to a location.
8958
8959 // Info about the registers and stack slot.
8960 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8961 *DAG.getContext());
8962
8963 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8964 nullptr, CC_LoongArch);
8965 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8966 report_fatal_error("GHC functions return void only");
8967 SDValue Glue;
8968 SmallVector<SDValue, 4> RetOps(1, Chain);
8969
8970 // Copy the result values into the output registers.
8971 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8972 SDValue Val = OutVals[OutIdx];
8973 CCValAssign &VA = RVLocs[i];
8974 assert(VA.isRegLoc() && "Can only return in registers!");
8975
8976 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8977 // Handle returning f64 on LA32D with a soft float ABI.
8978 assert(VA.isRegLoc() && "Expected return via registers");
8979 assert(VA.needsCustom());
8980 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8981 DAG.getVTList(MVT::i32, MVT::i32), Val);
8982 SDValue Lo = SplitF64.getValue(0);
8983 SDValue Hi = SplitF64.getValue(1);
8984 Register RegLo = VA.getLocReg();
8985 Register RegHi = RVLocs[++i].getLocReg();
8986
8987 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8988 Glue = Chain.getValue(1);
8989 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8990 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8991 Glue = Chain.getValue(1);
8992 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8993 } else {
8994 // Handle a 'normal' return.
8995 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8996 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8997
8998 // Guarantee that all emitted copies are stuck together.
8999 Glue = Chain.getValue(1);
9000 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9001 }
9002 }
9003
9004 RetOps[0] = Chain; // Update chain.
9005
9006 // Add the glue node if we have it.
9007 if (Glue.getNode())
9008 RetOps.push_back(Glue);
9009
9010 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
9011}
9012
9013// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
9014// Note: The following prefixes are excluded:
9015// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
9016// as they can be represented using [x]vrepli.[whb]
9018 const APInt &SplatValue, const unsigned SplatBitSize) const {
9019 uint64_t RequiredImm = 0;
9020 uint64_t V = SplatValue.getZExtValue();
9021 if (SplatBitSize == 16 && !(V & 0x00FF)) {
9022 // 4'b0101
9023 RequiredImm = (0b10101 << 8) | (V >> 8);
9024 return {true, RequiredImm};
9025 } else if (SplatBitSize == 32) {
9026 // 4'b0001
9027 if (!(V & 0xFFFF00FF)) {
9028 RequiredImm = (0b10001 << 8) | (V >> 8);
9029 return {true, RequiredImm};
9030 }
9031 // 4'b0010
9032 if (!(V & 0xFF00FFFF)) {
9033 RequiredImm = (0b10010 << 8) | (V >> 16);
9034 return {true, RequiredImm};
9035 }
9036 // 4'b0011
9037 if (!(V & 0x00FFFFFF)) {
9038 RequiredImm = (0b10011 << 8) | (V >> 24);
9039 return {true, RequiredImm};
9040 }
9041 // 4'b0110
9042 if ((V & 0xFFFF00FF) == 0xFF) {
9043 RequiredImm = (0b10110 << 8) | (V >> 8);
9044 return {true, RequiredImm};
9045 }
9046 // 4'b0111
9047 if ((V & 0xFF00FFFF) == 0xFFFF) {
9048 RequiredImm = (0b10111 << 8) | (V >> 16);
9049 return {true, RequiredImm};
9050 }
9051 // 4'b1010
9052 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
9053 RequiredImm =
9054 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9055 return {true, RequiredImm};
9056 }
9057 } else if (SplatBitSize == 64) {
9058 // 4'b1011
9059 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
9060 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
9061 RequiredImm =
9062 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9063 return {true, RequiredImm};
9064 }
9065 // 4'b1100
9066 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
9067 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
9068 RequiredImm =
9069 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
9070 return {true, RequiredImm};
9071 }
9072 // 4'b1001
9073 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
9074 uint8_t res = 0;
9075 for (int i = 0; i < 8; ++i) {
9076 uint8_t byte = x & 0xFF;
9077 if (byte == 0 || byte == 0xFF)
9078 res |= ((byte & 1) << i);
9079 else
9080 return {false, 0};
9081 x >>= 8;
9082 }
9083 return {true, res};
9084 };
9085 auto [IsSame, Suffix] = sameBitsPreByte(V);
9086 if (IsSame) {
9087 RequiredImm = (0b11001 << 8) | Suffix;
9088 return {true, RequiredImm};
9089 }
9090 }
9091 return {false, RequiredImm};
9092}
9093
9095 EVT VT) const {
9096 if (!Subtarget.hasExtLSX())
9097 return false;
9098
9099 if (VT == MVT::f32) {
9100 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
9101 return (masked == 0x3e000000 || masked == 0x40000000);
9102 }
9103
9104 if (VT == MVT::f64) {
9105 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
9106 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
9107 }
9108
9109 return false;
9110}
9111
9112bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
9113 bool ForCodeSize) const {
9114 // TODO: Maybe need more checks here after vector extension is supported.
9115 if (VT == MVT::f32 && !Subtarget.hasBasicF())
9116 return false;
9117 if (VT == MVT::f64 && !Subtarget.hasBasicD())
9118 return false;
9119 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
9120}
9121
9123 return true;
9124}
9125
9127 return true;
9128}
9129
9130bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9131 const Instruction *I) const {
9132 if (!Subtarget.is64Bit())
9133 return isa<LoadInst>(I) || isa<StoreInst>(I);
9134
9135 if (isa<LoadInst>(I))
9136 return true;
9137
9138 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9139 // require fences beacuse we can use amswap_db.[w/d].
9140 Type *Ty = I->getOperand(0)->getType();
9141 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
9142 unsigned Size = Ty->getIntegerBitWidth();
9143 return (Size == 8 || Size == 16);
9144 }
9145
9146 return false;
9147}
9148
9150 LLVMContext &Context,
9151 EVT VT) const {
9152 if (!VT.isVector())
9153 return getPointerTy(DL);
9155}
9156
9158 EVT VT = Y.getValueType();
9159
9160 if (VT.isVector())
9161 return Subtarget.hasExtLSX() && VT.isInteger();
9162
9163 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9164}
9165
9168 MachineFunction &MF, unsigned Intrinsic) const {
9169 switch (Intrinsic) {
9170 default:
9171 return;
9172 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9173 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9174 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9175 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9176 IntrinsicInfo Info;
9178 Info.memVT = MVT::i32;
9179 Info.ptrVal = I.getArgOperand(0);
9180 Info.offset = 0;
9181 Info.align = Align(4);
9184 Infos.push_back(Info);
9185 return;
9186 // TODO: Add more Intrinsics later.
9187 }
9188 }
9189}
9190
9191// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9192// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9193// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9194// regression, we need to implement it manually.
9197
9199 Op == AtomicRMWInst::And) &&
9200 "Unable to expand");
9201 unsigned MinWordSize = 4;
9202
9203 IRBuilder<> Builder(AI);
9204 LLVMContext &Ctx = Builder.getContext();
9205 const DataLayout &DL = AI->getDataLayout();
9206 Type *ValueType = AI->getType();
9207 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9208
9209 Value *Addr = AI->getPointerOperand();
9210 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9211 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9212
9213 Value *AlignedAddr = Builder.CreateIntrinsic(
9214 Intrinsic::ptrmask, {PtrTy, IntTy},
9215 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9216 "AlignedAddr");
9217
9218 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9219 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9220 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9221 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9222 Value *Mask = Builder.CreateShl(
9223 ConstantInt::get(WordType,
9224 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9225 ShiftAmt, "Mask");
9226 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9227 Value *ValOperand_Shifted =
9228 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9229 ShiftAmt, "ValOperand_Shifted");
9230 Value *NewOperand;
9231 if (Op == AtomicRMWInst::And)
9232 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9233 else
9234 NewOperand = ValOperand_Shifted;
9235
9236 AtomicRMWInst *NewAI =
9237 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9238 AI->getOrdering(), AI->getSyncScopeID());
9239
9240 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9241 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9242 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9243 AI->replaceAllUsesWith(FinalOldResult);
9244 AI->eraseFromParent();
9245}
9246
9249 const AtomicRMWInst *AI) const {
9250 // TODO: Add more AtomicRMWInst that needs to be extended.
9251
9252 // Since floating-point operation requires a non-trivial set of data
9253 // operations, use CmpXChg to expand.
9254 if (AI->isFloatingPointOperation() ||
9260
9261 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9264 AI->getOperation() == AtomicRMWInst::Sub)) {
9266 }
9267
9268 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9269 if (Subtarget.hasLAMCAS()) {
9270 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9274 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9276 }
9277
9278 if (Size == 8 || Size == 16)
9281}
9282
9283static Intrinsic::ID
9285 AtomicRMWInst::BinOp BinOp) {
9286 if (GRLen == 64) {
9287 switch (BinOp) {
9288 default:
9289 llvm_unreachable("Unexpected AtomicRMW BinOp");
9291 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9292 case AtomicRMWInst::Add:
9293 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9294 case AtomicRMWInst::Sub:
9295 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9297 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9299 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9301 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9302 case AtomicRMWInst::Max:
9303 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9304 case AtomicRMWInst::Min:
9305 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9306 // TODO: support other AtomicRMWInst.
9307 }
9308 }
9309
9310 if (GRLen == 32) {
9311 switch (BinOp) {
9312 default:
9313 llvm_unreachable("Unexpected AtomicRMW BinOp");
9315 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9316 case AtomicRMWInst::Add:
9317 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9318 case AtomicRMWInst::Sub:
9319 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9321 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9323 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9325 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9326 case AtomicRMWInst::Max:
9327 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9328 case AtomicRMWInst::Min:
9329 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9330 // TODO: support other AtomicRMWInst.
9331 }
9332 }
9333
9334 llvm_unreachable("Unexpected GRLen\n");
9335}
9336
9339 const AtomicCmpXchgInst *CI) const {
9340
9341 if (Subtarget.hasLAMCAS())
9343
9345 if (Size == 8 || Size == 16)
9348}
9349
9351 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9352 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9353 unsigned GRLen = Subtarget.getGRLen();
9354 AtomicOrdering FailOrd = CI->getFailureOrdering();
9355 Value *FailureOrdering =
9356 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9357 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9358 if (GRLen == 64) {
9359 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9360 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9361 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9362 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9363 }
9364 Type *Tys[] = {AlignedAddr->getType()};
9365 Value *Result = Builder.CreateIntrinsic(
9366 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9367 if (GRLen == 64)
9368 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9369 return Result;
9370}
9371
9373 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9374 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9375 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9376 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9377 // mask, as this produces better code than the LL/SC loop emitted by
9378 // int_loongarch_masked_atomicrmw_xchg.
9379 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9382 if (CVal->isZero())
9383 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9384 Builder.CreateNot(Mask, "Inv_Mask"),
9385 AI->getAlign(), Ord);
9386 if (CVal->isMinusOne())
9387 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9388 AI->getAlign(), Ord);
9389 }
9390
9391 unsigned GRLen = Subtarget.getGRLen();
9392 Value *Ordering =
9393 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9394 Type *Tys[] = {AlignedAddr->getType()};
9396 AI->getModule(),
9398
9399 if (GRLen == 64) {
9400 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9401 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9402 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9403 }
9404
9405 Value *Result;
9406
9407 // Must pass the shift amount needed to sign extend the loaded value prior
9408 // to performing a signed comparison for min/max. ShiftAmt is the number of
9409 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9410 // is the number of bits to left+right shift the value in order to
9411 // sign-extend.
9412 if (AI->getOperation() == AtomicRMWInst::Min ||
9414 const DataLayout &DL = AI->getDataLayout();
9415 unsigned ValWidth =
9416 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9417 Value *SextShamt =
9418 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9419 Result = Builder.CreateCall(LlwOpScwLoop,
9420 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9421 } else {
9422 Result =
9423 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9424 }
9425
9426 if (GRLen == 64)
9427 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9428 return Result;
9429}
9430
9432 const MachineFunction &MF, EVT VT) const {
9433 VT = VT.getScalarType();
9434
9435 if (!VT.isSimple())
9436 return false;
9437
9438 switch (VT.getSimpleVT().SimpleTy) {
9439 case MVT::f32:
9440 case MVT::f64:
9441 return true;
9442 default:
9443 break;
9444 }
9445
9446 return false;
9447}
9448
9450 const Constant *PersonalityFn) const {
9451 return LoongArch::R4;
9452}
9453
9455 const Constant *PersonalityFn) const {
9456 return LoongArch::R5;
9457}
9458
9459//===----------------------------------------------------------------------===//
9460// Target Optimization Hooks
9461//===----------------------------------------------------------------------===//
9462
9464 const LoongArchSubtarget &Subtarget) {
9465 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9466 // IEEE float has 23 digits and double has 52 digits.
9467 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9468 return RefinementSteps;
9469}
9470
9472 SelectionDAG &DAG, int Enabled,
9473 int &RefinementSteps,
9474 bool &UseOneConstNR,
9475 bool Reciprocal) const {
9476 if (Subtarget.hasFrecipe()) {
9477 SDLoc DL(Operand);
9478 EVT VT = Operand.getValueType();
9479
9480 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9481 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9482 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9483 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9484 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9485
9486 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9487 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9488
9489 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9490 if (Reciprocal)
9491 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9492
9493 return Estimate;
9494 }
9495 }
9496
9497 return SDValue();
9498}
9499
9501 SelectionDAG &DAG,
9502 int Enabled,
9503 int &RefinementSteps) const {
9504 if (Subtarget.hasFrecipe()) {
9505 SDLoc DL(Operand);
9506 EVT VT = Operand.getValueType();
9507
9508 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9509 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9510 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9511 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9512 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9513
9514 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9515 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9516
9517 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9518 }
9519 }
9520
9521 return SDValue();
9522}
9523
9524//===----------------------------------------------------------------------===//
9525// LoongArch Inline Assembly Support
9526//===----------------------------------------------------------------------===//
9527
9529LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9530 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9531 //
9532 // 'f': A floating-point register (if available).
9533 // 'k': A memory operand whose address is formed by a base register and
9534 // (optionally scaled) index register.
9535 // 'l': A signed 16-bit constant.
9536 // 'm': A memory operand whose address is formed by a base register and
9537 // offset that is suitable for use in instructions with the same
9538 // addressing mode as st.w and ld.w.
9539 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9540 // instruction)
9541 // 'I': A signed 12-bit constant (for arithmetic instructions).
9542 // 'J': Integer zero.
9543 // 'K': An unsigned 12-bit constant (for logic instructions).
9544 // "ZB": An address that is held in a general-purpose register. The offset is
9545 // zero.
9546 // "ZC": A memory operand whose address is formed by a base register and
9547 // offset that is suitable for use in instructions with the same
9548 // addressing mode as ll.w and sc.w.
9549 if (Constraint.size() == 1) {
9550 switch (Constraint[0]) {
9551 default:
9552 break;
9553 case 'f':
9554 case 'q':
9555 return C_RegisterClass;
9556 case 'l':
9557 case 'I':
9558 case 'J':
9559 case 'K':
9560 return C_Immediate;
9561 case 'k':
9562 return C_Memory;
9563 }
9564 }
9565
9566 if (Constraint == "ZC" || Constraint == "ZB")
9567 return C_Memory;
9568
9569 // 'm' is handled here.
9570 return TargetLowering::getConstraintType(Constraint);
9571}
9572
9573InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9574 StringRef ConstraintCode) const {
9575 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9579 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9580}
9581
9582std::pair<unsigned, const TargetRegisterClass *>
9583LoongArchTargetLowering::getRegForInlineAsmConstraint(
9584 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9585 // First, see if this is a constraint that directly corresponds to a LoongArch
9586 // register class.
9587 if (Constraint.size() == 1) {
9588 switch (Constraint[0]) {
9589 case 'r':
9590 // TODO: Support fixed vectors up to GRLen?
9591 if (VT.isVector())
9592 break;
9593 return std::make_pair(0U, &LoongArch::GPRRegClass);
9594 case 'q':
9595 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9596 case 'f':
9597 if (Subtarget.hasBasicF() && VT == MVT::f32)
9598 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9599 if (Subtarget.hasBasicD() && VT == MVT::f64)
9600 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9601 if (Subtarget.hasExtLSX() &&
9602 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9603 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9604 if (Subtarget.hasExtLASX() &&
9605 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9606 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9607 break;
9608 default:
9609 break;
9610 }
9611 }
9612
9613 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9614 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9615 // constraints while the official register name is prefixed with a '$'. So we
9616 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9617 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9618 // case insensitive, so no need to convert the constraint to upper case here.
9619 //
9620 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9621 // decode the usage of register name aliases into their official names. And
9622 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9623 // official register names.
9624 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9625 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9626 bool IsFP = Constraint[2] == 'f';
9627 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9628 std::pair<unsigned, const TargetRegisterClass *> R;
9630 TRI, join_items("", Temp.first, Temp.second), VT);
9631 // Match those names to the widest floating point register type available.
9632 if (IsFP) {
9633 unsigned RegNo = R.first;
9634 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9635 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9636 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9637 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9638 }
9639 }
9640 }
9641 return R;
9642 }
9643
9644 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9645}
9646
9647void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9648 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9649 SelectionDAG &DAG) const {
9650 // Currently only support length 1 constraints.
9651 if (Constraint.size() == 1) {
9652 switch (Constraint[0]) {
9653 case 'l':
9654 // Validate & create a 16-bit signed immediate operand.
9655 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9656 uint64_t CVal = C->getSExtValue();
9657 if (isInt<16>(CVal))
9658 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9659 Subtarget.getGRLenVT()));
9660 }
9661 return;
9662 case 'I':
9663 // Validate & create a 12-bit signed immediate operand.
9664 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9665 uint64_t CVal = C->getSExtValue();
9666 if (isInt<12>(CVal))
9667 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9668 Subtarget.getGRLenVT()));
9669 }
9670 return;
9671 case 'J':
9672 // Validate & create an integer zero operand.
9673 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9674 if (C->getZExtValue() == 0)
9675 Ops.push_back(
9676 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9677 return;
9678 case 'K':
9679 // Validate & create a 12-bit unsigned immediate operand.
9680 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9681 uint64_t CVal = C->getZExtValue();
9682 if (isUInt<12>(CVal))
9683 Ops.push_back(
9684 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9685 }
9686 return;
9687 default:
9688 break;
9689 }
9690 }
9692}
9693
9694#define GET_REGISTER_MATCHER
9695#include "LoongArchGenAsmMatcher.inc"
9696
9699 const MachineFunction &MF) const {
9700 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9701 std::string NewRegName = Name.second.str();
9702 Register Reg = MatchRegisterAltName(NewRegName);
9703 if (!Reg)
9704 Reg = MatchRegisterName(NewRegName);
9705 if (!Reg)
9706 return Reg;
9707 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9708 if (!ReservedRegs.test(Reg))
9709 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9710 StringRef(RegName) + "\"."));
9711 return Reg;
9712}
9713
9715 EVT VT, SDValue C) const {
9716 // TODO: Support vectors.
9717 if (!VT.isScalarInteger())
9718 return false;
9719
9720 // Omit the optimization if the data size exceeds GRLen.
9721 if (VT.getSizeInBits() > Subtarget.getGRLen())
9722 return false;
9723
9724 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9725 const APInt &Imm = ConstNode->getAPIntValue();
9726 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9727 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9728 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9729 return true;
9730 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9731 if (ConstNode->hasOneUse() &&
9732 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9733 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9734 return true;
9735 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9736 // in which the immediate has two set bits. Or Break (MUL x, imm)
9737 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9738 // equals to (1 << s0) - (1 << s1).
9739 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9740 unsigned Shifts = Imm.countr_zero();
9741 // Reject immediates which can be composed via a single LUI.
9742 if (Shifts >= 12)
9743 return false;
9744 // Reject multiplications can be optimized to
9745 // (SLLI (ALSL x, x, 1/2/3/4), s).
9746 APInt ImmPop = Imm.ashr(Shifts);
9747 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9748 return false;
9749 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9750 // since it needs one more instruction than other 3 cases.
9751 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9752 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9753 (ImmSmall - Imm).isPowerOf2())
9754 return true;
9755 }
9756 }
9757
9758 return false;
9759}
9760
9762 const AddrMode &AM,
9763 Type *Ty, unsigned AS,
9764 Instruction *I) const {
9765 // LoongArch has four basic addressing modes:
9766 // 1. reg
9767 // 2. reg + 12-bit signed offset
9768 // 3. reg + 14-bit signed offset left-shifted by 2
9769 // 4. reg1 + reg2
9770 // TODO: Add more checks after support vector extension.
9771
9772 // No global is ever allowed as a base.
9773 if (AM.BaseGV)
9774 return false;
9775
9776 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9777 // with `UAL` feature.
9778 if (!isInt<12>(AM.BaseOffs) &&
9779 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9780 return false;
9781
9782 switch (AM.Scale) {
9783 case 0:
9784 // "r+i" or just "i", depending on HasBaseReg.
9785 break;
9786 case 1:
9787 // "r+r+i" is not allowed.
9788 if (AM.HasBaseReg && AM.BaseOffs)
9789 return false;
9790 // Otherwise we have "r+r" or "r+i".
9791 break;
9792 case 2:
9793 // "2*r+r" or "2*r+i" is not allowed.
9794 if (AM.HasBaseReg || AM.BaseOffs)
9795 return false;
9796 // Allow "2*r" as "r+r".
9797 break;
9798 default:
9799 return false;
9800 }
9801
9802 return true;
9803}
9804
9806 return isInt<12>(Imm);
9807}
9808
9810 return isInt<12>(Imm);
9811}
9812
9814 // Zexts are free if they can be combined with a load.
9815 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9816 // poorly with type legalization of compares preferring sext.
9817 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9818 EVT MemVT = LD->getMemoryVT();
9819 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9820 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9821 LD->getExtensionType() == ISD::ZEXTLOAD))
9822 return true;
9823 }
9824
9825 return TargetLowering::isZExtFree(Val, VT2);
9826}
9827
9829 EVT DstVT) const {
9830 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9831}
9832
9834 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9835}
9836
9838 // TODO: Support vectors.
9839 if (Y.getValueType().isVector())
9840 return false;
9841
9842 return !isa<ConstantSDNode>(Y);
9843}
9844
9846 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9847 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9848}
9849
9851 Type *Ty, bool IsSigned) const {
9852 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9853 return true;
9854
9855 return IsSigned;
9856}
9857
9859 // Return false to suppress the unnecessary extensions if the LibCall
9860 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9861 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9862 Type.getSizeInBits() < Subtarget.getGRLen()))
9863 return false;
9864 return true;
9865}
9866
9867// memcpy, and other memory intrinsics, typically tries to use wider load/store
9868// if the source/dest is aligned and the copy size is large enough. We therefore
9869// want to align such objects passed to memory intrinsics.
9871 unsigned &MinSize,
9872 Align &PrefAlign) const {
9873 if (!isa<MemIntrinsic>(CI))
9874 return false;
9875
9876 if (Subtarget.is64Bit()) {
9877 MinSize = 8;
9878 PrefAlign = Align(8);
9879 } else {
9880 MinSize = 4;
9881 PrefAlign = Align(4);
9882 }
9883
9884 return true;
9885}
9886
9895
9896bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9897 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9898 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9899 bool IsABIRegCopy = CC.has_value();
9900 EVT ValueVT = Val.getValueType();
9901
9902 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9903 PartVT == MVT::f32) {
9904 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9905 // nan, and cast to f32.
9906 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9907 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9908 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9909 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9910 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9911 Parts[0] = Val;
9912 return true;
9913 }
9914
9915 return false;
9916}
9917
9918SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9919 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9920 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9921 bool IsABIRegCopy = CC.has_value();
9922
9923 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9924 PartVT == MVT::f32) {
9925 SDValue Val = Parts[0];
9926
9927 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9928 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9929 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9930 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9931 return Val;
9932 }
9933
9934 return SDValue();
9935}
9936
9937MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9938 CallingConv::ID CC,
9939 EVT VT) const {
9940 // Use f32 to pass f16.
9941 if (VT == MVT::f16 && Subtarget.hasBasicF())
9942 return MVT::f32;
9943
9945}
9946
9947unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9948 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9949 // Use f32 to pass f16.
9950 if (VT == MVT::f16 && Subtarget.hasBasicF())
9951 return 1;
9952
9954}
9955
9957 SDValue Op, const APInt &OriginalDemandedBits,
9958 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9959 unsigned Depth) const {
9960 EVT VT = Op.getValueType();
9961 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9962 unsigned Opc = Op.getOpcode();
9963 switch (Opc) {
9964 default:
9965 break;
9966 case LoongArchISD::VMSKLTZ:
9967 case LoongArchISD::XVMSKLTZ: {
9968 SDValue Src = Op.getOperand(0);
9969 MVT SrcVT = Src.getSimpleValueType();
9970 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9971 unsigned NumElts = SrcVT.getVectorNumElements();
9972
9973 // If we don't need the sign bits at all just return zero.
9974 if (OriginalDemandedBits.countr_zero() >= NumElts)
9975 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9976
9977 // Only demand the vector elements of the sign bits we need.
9978 APInt KnownUndef, KnownZero;
9979 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9980 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9981 TLO, Depth + 1))
9982 return true;
9983
9984 Known.Zero = KnownZero.zext(BitWidth);
9985 Known.Zero.setHighBits(BitWidth - NumElts);
9986
9987 // [X]VMSKLTZ only uses the MSB from each vector element.
9988 KnownBits KnownSrc;
9989 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9990 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9991 Depth + 1))
9992 return true;
9993
9994 if (KnownSrc.One[SrcBits - 1])
9995 Known.One.setLowBits(NumElts);
9996 else if (KnownSrc.Zero[SrcBits - 1])
9997 Known.Zero.setLowBits(NumElts);
9998
9999 // Attempt to avoid multi-use ops if we don't need anything from it.
10001 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
10002 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
10003 return false;
10004 }
10005 }
10006
10008 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
10009}
10010
10012 unsigned Opc = VecOp.getOpcode();
10013
10014 // Assume target opcodes can't be scalarized.
10015 // TODO - do we have any exceptions?
10016 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
10017 return false;
10018
10019 // If the vector op is not supported, try to convert to scalar.
10020 EVT VecVT = VecOp.getValueType();
10022 return true;
10023
10024 // If the vector op is supported, but the scalar op is not, the transform may
10025 // not be worthwhile.
10026 EVT ScalarVT = VecVT.getScalarType();
10027 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
10028}
10029
10031 unsigned Index) const {
10033 return false;
10034
10035 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
10036 return Index == 0;
10037}
10038
10040 unsigned Index) const {
10041 EVT EltVT = VT.getScalarType();
10042
10043 // Extract a scalar FP value from index 0 of a vector is free.
10044 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
10045}
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1499
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1406
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:461
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...