LLVM 23.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v8f16, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
128 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
129 }
130
131 if (Subtarget.hasVector())
132 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
133 }
134
135 // Compute derived properties from the register classes
136 computeRegisterProperties(Subtarget.getRegisterInfo());
137
138 // Set up special registers.
139 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
140
141 // TODO: It may be better to default to latency-oriented scheduling, however
142 // LLVM's current latency-oriented scheduler can't handle physreg definitions
143 // such as SystemZ has with CC, so set this to the register-pressure
144 // scheduler, because it can.
146
149
151
152 // Instructions are strings of 2-byte aligned 2-byte values.
154 // For performance reasons we prefer 16-byte alignment.
156
157 // Handle operations that are handled in a similar way for all types.
158 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
159 I <= MVT::LAST_FP_VALUETYPE;
160 ++I) {
162 if (isTypeLegal(VT)) {
163 // Lower SET_CC into an IPM-based sequence.
167
168 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
170
171 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
174 }
175 }
176
177 // Expand jump table branches as address arithmetic followed by an
178 // indirect jump.
180
181 // Expand BRCOND into a BR_CC (see above).
183
184 // Handle integer types except i128.
185 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
186 I <= MVT::LAST_INTEGER_VALUETYPE;
187 ++I) {
189 if (isTypeLegal(VT) && VT != MVT::i128) {
191
192 // Expand individual DIV and REMs into DIVREMs.
199
200 // Support addition/subtraction with overflow.
203
204 // Support addition/subtraction with carry.
207
208 // Support carry in as value rather than glue.
211
212 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
213 // available, or if the operand is constant.
215
216 // Use POPCNT on z196 and above.
217 if (Subtarget.hasPopulationCount())
219 else
221
222 // No special instructions for these.
225
226 // Use *MUL_LOHI where possible instead of MULH*.
231
232 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
233 // unsigned on z10 (only z196 and above have native support for
234 // unsigned conversions).
241 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
242 auto OpAction =
243 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
244 setOperationAction(Op, VT, OpAction);
245 }
246 }
247 }
248
249 // Handle i128 if legal.
250 if (isTypeLegal(MVT::i128)) {
251 // No special instructions for these.
258
259 // We may be able to use VSLDB/VSLD/VSRD for these.
262
263 // No special instructions for these before z17.
264 if (!Subtarget.hasVectorEnhancements3()) {
274 } else {
275 // Even if we do have a legal 128-bit multiply, we do not
276 // want 64-bit multiply-high operations to use it.
279 }
280
281 // Support addition/subtraction with carry.
286
287 // Use VPOPCT and add up partial results.
289
290 // Additional instructions available with z17.
291 if (Subtarget.hasVectorEnhancements3()) {
292 setOperationAction(ISD::ABS, MVT::i128, Legal);
293
295 MVT::i128, Legal);
296 }
297 }
298
299 // These need custom handling in order to handle the f16 conversions.
308
309 // Type legalization will convert 8- and 16-bit atomic operations into
310 // forms that operate on i32s (but still keeping the original memory VT).
311 // Lower them into full i32 operations.
323
324 // Whether or not i128 is not a legal type, we need to custom lower
325 // the atomic operations in order to exploit SystemZ instructions.
330
331 // Mark sign/zero extending atomic loads as legal, which will make
332 // DAGCombiner fold extensions into atomic loads if possible.
334 {MVT::i8, MVT::i16, MVT::i32}, Legal);
336 {MVT::i8, MVT::i16}, Legal);
338 MVT::i8, Legal);
339
340 // We can use the CC result of compare-and-swap to implement
341 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
345
347
348 // Traps are legal, as we will convert them to "j .+2".
349 setOperationAction(ISD::TRAP, MVT::Other, Legal);
350
351 // We have native support for a 64-bit CTLZ, via FLOGR.
355
356 // On z17 we have native support for a 64-bit CTTZ.
357 if (Subtarget.hasMiscellaneousExtensions4()) {
361 }
362
363 // On z15 we have native support for a 64-bit CTPOP.
364 if (Subtarget.hasMiscellaneousExtensions3()) {
367 }
368
369 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
371
372 // Expand 128 bit shifts without using a libcall.
376
377 // Also expand 256 bit shifts if i128 is a legal type.
378 if (isTypeLegal(MVT::i128)) {
382 }
383
384 // Handle bitcast from fp128 to i128.
385 if (!isTypeLegal(MVT::i128))
387
388 // We have native instructions for i8, i16 and i32 extensions, but not i1.
390 for (MVT VT : MVT::integer_valuetypes()) {
394 }
395
396 // Handle the various types of symbolic address.
402
403 // We need to handle dynamic allocations specially because of the
404 // 160-byte area at the bottom of the stack.
407
410
411 // Handle prefetches with PFD or PFDRL.
413
414 // Handle readcyclecounter with STCKF.
416
418 // Assume by default that all vector operations need to be expanded.
419 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
420 if (getOperationAction(Opcode, VT) == Legal)
421 setOperationAction(Opcode, VT, Expand);
422
423 // Likewise all truncating stores and extending loads.
424 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
425 setTruncStoreAction(VT, InnerVT, Expand);
428 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
429 }
430
431 if (isTypeLegal(VT)) {
432 // These operations are legal for anything that can be stored in a
433 // vector register, even if there is no native support for the format
434 // as such. In particular, we can do these for v4f32 even though there
435 // are no specific instructions for that format.
441
442 // Likewise, except that we need to replace the nodes with something
443 // more specific.
446 }
447 }
448
449 // Handle integer vector types.
451 if (isTypeLegal(VT)) {
452 // These operations have direct equivalents.
457 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
461 }
462 if (Subtarget.hasVectorEnhancements3() &&
463 VT != MVT::v16i8 && VT != MVT::v8i16) {
468 }
473 if (Subtarget.hasVectorEnhancements1())
475 else
479
480 // Convert a GPR scalar to a vector by inserting it into element 0.
482
483 // Use a series of unpacks for extensions.
486
487 // Detect shifts/rotates by a scalar amount and convert them into
488 // V*_BY_SCALAR.
493
494 // Add ISD::VECREDUCE_ADD as custom in order to implement
495 // it with VZERO+VSUM
497
498 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
499 // and inverting the result as necessary.
501
503 Legal);
504 }
505 }
506
507 if (Subtarget.hasVector()) {
508 // There should be no need to check for float types other than v2f64
509 // since <2 x f32> isn't a legal type.
518
527 }
528
529 if (Subtarget.hasVectorEnhancements2()) {
538
547 }
548
549 // Handle floating-point types.
550 if (!useSoftFloat()) {
551 // Promote all f16 operations to float, with some exceptions below.
552 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
553 setOperationAction(Opc, MVT::f16, Promote);
555 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
556 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
557 setTruncStoreAction(VT, MVT::f16, Expand);
558 }
560 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
565 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
566 setOperationAction(Op, MVT::f16, Legal);
567 }
568
569 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
570 I <= MVT::LAST_FP_VALUETYPE;
571 ++I) {
573 if (isTypeLegal(VT) && VT != MVT::f16) {
574 // We can use FI for FRINT.
576
577 // We can use the extended form of FI for other rounding operations.
578 if (Subtarget.hasFPExtension()) {
585 }
586
587 // No special instructions for these.
593
594 // Special treatment.
596
597 // Handle constrained floating-point operations.
606 if (Subtarget.hasFPExtension()) {
613 }
614
615 // Extension from f16 needs libcall.
618 }
619 }
620
621 // Handle floating-point vector types.
622 if (Subtarget.hasVector()) {
623 // Scalar-to-vector conversion is just a subreg.
627
628 // Some insertions and extractions can be done directly but others
629 // need to go via integers.
636
637 // These operations have direct equivalents.
638 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
639 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
640 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
641 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
642 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
643 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
644 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
645 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
646 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
649 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
653
654 // Handle constrained floating-point operations.
668
673 if (Subtarget.hasVectorEnhancements1()) {
676 }
677 }
678
679 // The vector enhancements facility 1 has instructions for these.
680 if (Subtarget.hasVectorEnhancements1()) {
681 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
682 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
683 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
684 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
685 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
686 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
687 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
688 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
689 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
692 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
696
697 for (MVT Type : {MVT::f64, MVT::v2f64, MVT::f32, MVT::v4f32, MVT::f128}) {
704 }
705
706 // Handle constrained floating-point operations.
720 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
721 MVT::v4f32, MVT::v2f64 }) {
726 }
727 }
728
729 // We only have fused f128 multiply-addition on vector registers.
730 if (!Subtarget.hasVectorEnhancements1()) {
733 }
734
735 // We don't have a copysign instruction on vector registers.
736 if (Subtarget.hasVectorEnhancements1())
738
739 // Needed so that we don't try to implement f128 constant loads using
740 // a load-and-extend of a f80 constant (in cases where the constant
741 // would fit in an f80).
742 for (MVT VT : MVT::fp_valuetypes())
743 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
744
745 // We don't have extending load instruction on vector registers.
746 if (Subtarget.hasVectorEnhancements1()) {
747 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
748 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
749 }
750
751 // Floating-point truncation and stores need to be done separately.
752 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
753 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
754 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
755
756 // We have 64-bit FPR<->GPR moves, but need special handling for
757 // 32-bit forms.
758 if (!Subtarget.hasVector()) {
761 }
762
763 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
764 // structure, but VAEND is a no-op.
768
769 if (Subtarget.isTargetzOS()) {
770 // Handle address space casts between mixed sized pointers.
773 }
774
776
777 // Codes for which we want to perform some z-specific combinations.
781 ISD::LOAD,
794 ISD::SRL,
795 ISD::SRA,
796 ISD::MUL,
797 ISD::SDIV,
798 ISD::UDIV,
799 ISD::SREM,
800 ISD::UREM,
803
804 // Handle intrinsics.
807
808 // We're not using SJLJ for exception handling, but they're implemented
809 // solely to support use of __builtin_setjmp / __builtin_longjmp.
812
813 // We want to use MVC in preference to even a single load/store pair.
814 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
816
817 // The main memset sequence is a byte store followed by an MVC.
818 // Two STC or MV..I stores win over that, but the kind of fused stores
819 // generated by target-independent code don't when the byte value is
820 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
821 // than "STC;MVC". Handle the choice in target-specific code instead.
822 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
824
825 // Default to having -disable-strictnode-mutation on
826 IsStrictFPEnabled = true;
827}
828
830 return Subtarget.hasSoftFloat();
831}
832
834 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
835 unsigned &NumIntermediates, MVT &RegisterVT) const {
836 // Pass fp16 vectors in VR(s).
837 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16) {
838 IntermediateVT = RegisterVT = MVT::v8f16;
839 return NumIntermediates =
841 }
843 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
844}
845
848 EVT VT) const {
849 // 128-bit single-element vector types are passed like other vectors,
850 // not like their element type.
851 if (VT.isVector() && VT.getSizeInBits() == 128 &&
852 VT.getVectorNumElements() == 1)
853 return MVT::v16i8;
854 // Pass fp16 vectors in VR(s).
855 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
856 return MVT::v8f16;
857 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
858}
859
861 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
862 // Pass fp16 vectors in VR(s).
863 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
865 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
866}
867
869 LLVMContext &, EVT VT) const {
870 if (!VT.isVector())
871 return MVT::i32;
873}
874
876 const MachineFunction &MF, EVT VT) const {
877 if (useSoftFloat())
878 return false;
879
880 VT = VT.getScalarType();
881
882 if (!VT.isSimple())
883 return false;
884
885 switch (VT.getSimpleVT().SimpleTy) {
886 case MVT::f32:
887 case MVT::f64:
888 return true;
889 case MVT::f128:
890 return Subtarget.hasVectorEnhancements1();
891 default:
892 break;
893 }
894
895 return false;
896}
897
898// Return true if the constant can be generated with a vector instruction,
899// such as VGM, VGMB or VREPI.
901 const SystemZSubtarget &Subtarget) {
902 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
903 if (!Subtarget.hasVector() ||
904 (isFP128 && !Subtarget.hasVectorEnhancements1()))
905 return false;
906
907 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
908 // preferred way of creating all-zero and all-one vectors so give it
909 // priority over other methods below.
910 unsigned Mask = 0;
911 unsigned I = 0;
912 for (; I < SystemZ::VectorBytes; ++I) {
913 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
914 if (Byte == 0xff)
915 Mask |= 1ULL << I;
916 else if (Byte != 0)
917 break;
918 }
919 if (I == SystemZ::VectorBytes) {
920 Opcode = SystemZISD::BYTE_MASK;
921 OpVals.push_back(Mask);
923 return true;
924 }
925
926 if (SplatBitSize > 64)
927 return false;
928
929 auto TryValue = [&](uint64_t Value) -> bool {
930 // Try VECTOR REPLICATE IMMEDIATE
931 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
932 if (isInt<16>(SignedValue)) {
933 OpVals.push_back(((unsigned) SignedValue));
934 Opcode = SystemZISD::REPLICATE;
936 SystemZ::VectorBits / SplatBitSize);
937 return true;
938 }
939 // Try VECTOR GENERATE MASK
940 unsigned Start, End;
941 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
942 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
943 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
944 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
945 OpVals.push_back(Start - (64 - SplatBitSize));
946 OpVals.push_back(End - (64 - SplatBitSize));
947 Opcode = SystemZISD::ROTATE_MASK;
949 SystemZ::VectorBits / SplatBitSize);
950 return true;
951 }
952 return false;
953 };
954
955 // First try assuming that any undefined bits above the highest set bit
956 // and below the lowest set bit are 1s. This increases the likelihood of
957 // being able to use a sign-extended element value in VECTOR REPLICATE
958 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
959 uint64_t SplatBitsZ = SplatBits.getZExtValue();
960 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
961 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
962 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
963 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
964 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
965 if (TryValue(SplatBitsZ | Upper | Lower))
966 return true;
967
968 // Now try assuming that any undefined bits between the first and
969 // last defined set bits are set. This increases the chances of
970 // using a non-wraparound mask.
971 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
972 return TryValue(SplatBitsZ | Middle);
973}
974
976 if (IntImm.isSingleWord()) {
977 IntBits = APInt(128, IntImm.getZExtValue());
978 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
979 } else
980 IntBits = IntImm;
981 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
982
983 // Find the smallest splat.
984 SplatBits = IntImm;
985 unsigned Width = SplatBits.getBitWidth();
986 while (Width > 8) {
987 unsigned HalfSize = Width / 2;
988 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
989 APInt LowValue = SplatBits.trunc(HalfSize);
990
991 // If the two halves do not match, stop here.
992 if (HighValue != LowValue || 8 > HalfSize)
993 break;
994
995 SplatBits = HighValue;
996 Width = HalfSize;
997 }
998 SplatUndef = 0;
999 SplatBitSize = Width;
1000}
1001
1003 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
1004 bool HasAnyUndefs;
1005
1006 // Get IntBits by finding the 128 bit splat.
1007 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
1008 true);
1009
1010 // Get SplatBits by finding the 8 bit or greater splat.
1011 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
1012 true);
1013}
1014
1016 bool ForCodeSize) const {
1017 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
1018 if (Imm.isZero() || Imm.isNegZero())
1019 return true;
1020
1021 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
1022}
1023
1026 MachineBasicBlock *MBB) const {
1027 DebugLoc DL = MI.getDebugLoc();
1028 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1029 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1030
1031 MachineFunction *MF = MBB->getParent();
1032 MachineRegisterInfo &MRI = MF->getRegInfo();
1033
1034 const BasicBlock *BB = MBB->getBasicBlock();
1035 MachineFunction::iterator I = ++MBB->getIterator();
1036
1037 Register DstReg = MI.getOperand(0).getReg();
1038 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1039 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1040 (void)TRI;
1041 Register MainDstReg = MRI.createVirtualRegister(RC);
1042 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1043
1044 MVT PVT = getPointerTy(MF->getDataLayout());
1045 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1046 // For v = setjmp(buf), we generate.
1047 // Algorithm:
1048 //
1049 // ---------
1050 // | thisMBB |
1051 // ---------
1052 // |
1053 // ------------------------
1054 // | |
1055 // ---------- ---------------
1056 // | mainMBB | | restoreMBB |
1057 // | v = 0 | | v = 1 |
1058 // ---------- ---------------
1059 // | |
1060 // -------------------------
1061 // |
1062 // -----------------------------
1063 // | sinkMBB |
1064 // | phi(v_mainMBB,v_restoreMBB) |
1065 // -----------------------------
1066 // thisMBB:
1067 // buf[FPOffset] = Frame Pointer if hasFP.
1068 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1069 // buf[BCOffset] = Backchain value if building with -mbackchain.
1070 // buf[SPOffset] = Stack Pointer.
1071 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1072 // SjLjSetup restoreMBB
1073 // mainMBB:
1074 // v_main = 0
1075 // sinkMBB:
1076 // v = phi(v_main, v_restore)
1077 // restoreMBB:
1078 // v_restore = 1
1079
1080 MachineBasicBlock *ThisMBB = MBB;
1081 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1082 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1083 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1084
1085 MF->insert(I, MainMBB);
1086 MF->insert(I, SinkMBB);
1087 MF->push_back(RestoreMBB);
1088 RestoreMBB->setMachineBlockAddressTaken();
1089
1091
1092 // Transfer the remainder of BB and its successor edges to sinkMBB.
1093 SinkMBB->splice(SinkMBB->begin(), MBB,
1094 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1096
1097 // thisMBB:
1098 const int64_t FPOffset = 0; // Slot 1.
1099 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1100 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1101 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1102
1103 // Buf address.
1104 Register BufReg = MI.getOperand(1).getReg();
1105
1106 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1107 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1108
1109 // Prepare IP for longjmp.
1110 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1111 .addMBB(RestoreMBB);
1112 // Store IP for return from jmp, slot 2, offset = 1.
1113 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1114 .addReg(LabelReg)
1115 .addReg(BufReg)
1116 .addImm(LabelOffset)
1117 .addReg(0);
1118
1119 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1120 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1121 if (HasFP) {
1122 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1123 .addReg(SpecialRegs->getFramePointerRegister())
1124 .addReg(BufReg)
1125 .addImm(FPOffset)
1126 .addReg(0);
1127 }
1128
1129 // Store SP.
1130 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1131 .addReg(SpecialRegs->getStackPointerRegister())
1132 .addReg(BufReg)
1133 .addImm(SPOffset)
1134 .addReg(0);
1135
1136 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1137 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1138 if (BackChain) {
1139 Register BCReg = MRI.createVirtualRegister(PtrRC);
1140 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1141 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1142 .addReg(SpecialRegs->getStackPointerRegister())
1143 .addImm(TFL->getBackchainOffset(*MF))
1144 .addReg(0);
1145
1146 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1147 .addReg(BCReg)
1148 .addReg(BufReg)
1149 .addImm(BCOffset)
1150 .addReg(0);
1151 }
1152
1153 // Setup.
1154 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1155 .addMBB(RestoreMBB);
1156
1157 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1158 MIB.addRegMask(RegInfo->getNoPreservedMask());
1159
1160 ThisMBB->addSuccessor(MainMBB);
1161 ThisMBB->addSuccessor(RestoreMBB);
1162
1163 // mainMBB:
1164 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1165 MainMBB->addSuccessor(SinkMBB);
1166
1167 // sinkMBB:
1168 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1169 .addReg(MainDstReg)
1170 .addMBB(MainMBB)
1171 .addReg(RestoreDstReg)
1172 .addMBB(RestoreMBB);
1173
1174 // restoreMBB.
1175 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1176 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1177 RestoreMBB->addSuccessor(SinkMBB);
1178
1179 MI.eraseFromParent();
1180
1181 return SinkMBB;
1182}
1183
1186 MachineBasicBlock *MBB) const {
1187
1188 DebugLoc DL = MI.getDebugLoc();
1189 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1190
1191 MachineFunction *MF = MBB->getParent();
1192 MachineRegisterInfo &MRI = MF->getRegInfo();
1193
1194 MVT PVT = getPointerTy(MF->getDataLayout());
1195 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1196 Register BufReg = MI.getOperand(0).getReg();
1197 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1198 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1199
1200 Register Tmp = MRI.createVirtualRegister(RC);
1201 Register BCReg = MRI.createVirtualRegister(RC);
1202
1204
1205 const int64_t FPOffset = 0;
1206 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1207 const int64_t BCOffset = 2 * PVT.getStoreSize();
1208 const int64_t SPOffset = 3 * PVT.getStoreSize();
1209 const int64_t LPOffset = 4 * PVT.getStoreSize();
1210
1211 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1212 .addReg(BufReg)
1213 .addImm(LabelOffset)
1214 .addReg(0);
1215
1216 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1217 SpecialRegs->getFramePointerRegister())
1218 .addReg(BufReg)
1219 .addImm(FPOffset)
1220 .addReg(0);
1221
1222 // We are restoring R13 even though we never stored in setjmp from llvm,
1223 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1224 // gcc setjmp and llvm longjmp.
1225 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1226 .addReg(BufReg)
1227 .addImm(LPOffset)
1228 .addReg(0);
1229
1230 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1231 if (BackChain) {
1232 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1233 .addReg(BufReg)
1234 .addImm(BCOffset)
1235 .addReg(0);
1236 }
1237
1238 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1239 SpecialRegs->getStackPointerRegister())
1240 .addReg(BufReg)
1241 .addImm(SPOffset)
1242 .addReg(0);
1243
1244 if (BackChain) {
1245 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1246 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1247 .addReg(BCReg)
1248 .addReg(SpecialRegs->getStackPointerRegister())
1249 .addImm(TFL->getBackchainOffset(*MF))
1250 .addReg(0);
1251 }
1252
1253 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1254
1255 MI.eraseFromParent();
1256 return MBB;
1257}
1258
1259/// Returns true if stack probing through inline assembly is requested.
1261 // If the function specifically requests inline stack probes, emit them.
1262 if (MF.getFunction().hasFnAttribute("probe-stack"))
1263 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1264 "inline-asm";
1265 return false;
1266}
1267
1272
1277
1280 const AtomicRMWInst *RMW) const {
1281 // Don't expand subword operations as they require special treatment.
1282 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1284
1285 // Don't expand if there is a target instruction available.
1286 if (Subtarget.hasInterlockedAccess1() &&
1287 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1294
1296}
1297
1299 // We can use CGFI or CLGFI.
1300 return isInt<32>(Imm) || isUInt<32>(Imm);
1301}
1302
1304 // We can use ALGFI or SLGFI.
1305 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1306}
1307
1309 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1310 // Unaligned accesses should never be slower than the expanded version.
1311 // We check specifically for aligned accesses in the few cases where
1312 // they are required.
1313 if (Fast)
1314 *Fast = 1;
1315 return true;
1316}
1317
1319 EVT VT = Y.getValueType();
1320
1321 // We can use NC(G)RK for types in GPRs ...
1322 if (VT == MVT::i32 || VT == MVT::i64)
1323 return Subtarget.hasMiscellaneousExtensions3();
1324
1325 // ... or VNC for types in VRs.
1326 if (VT.isVector() || VT == MVT::i128)
1327 return Subtarget.hasVector();
1328
1329 return false;
1330}
1331
1332// Information about the addressing mode for a memory access.
1334 // True if a long displacement is supported.
1336
1337 // True if use of index register is supported.
1339
1340 AddressingMode(bool LongDispl, bool IdxReg) :
1341 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1342};
1343
1344// Return the desired addressing mode for a Load which has only one use (in
1345// the same block) which is a Store.
1347 Type *Ty) {
1348 // With vector support a Load->Store combination may be combined to either
1349 // an MVC or vector operations and it seems to work best to allow the
1350 // vector addressing mode.
1351 if (HasVector)
1352 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1353
1354 // Otherwise only the MVC case is special.
1355 bool MVC = Ty->isIntegerTy(8);
1356 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1357}
1358
1359// Return the addressing mode which seems most desirable given an LLVM
1360// Instruction pointer.
1361static AddressingMode
1364 switch (II->getIntrinsicID()) {
1365 default: break;
1366 case Intrinsic::memset:
1367 case Intrinsic::memmove:
1368 case Intrinsic::memcpy:
1369 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1370 }
1371 }
1372
1373 if (isa<LoadInst>(I) && I->hasOneUse()) {
1374 auto *SingleUser = cast<Instruction>(*I->user_begin());
1375 if (SingleUser->getParent() == I->getParent()) {
1376 if (isa<ICmpInst>(SingleUser)) {
1377 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1378 if (C->getBitWidth() <= 64 &&
1379 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1380 // Comparison of memory with 16 bit signed / unsigned immediate
1381 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1382 } else if (isa<StoreInst>(SingleUser))
1383 // Load->Store
1384 return getLoadStoreAddrMode(HasVector, I->getType());
1385 }
1386 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1387 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1388 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1389 // Load->Store
1390 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1391 }
1392
1393 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1394
1395 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1396 // dependencies (LDE only supports small offsets).
1397 // * Utilize the vector registers to hold floating point
1398 // values (vector load / store instructions only support small
1399 // offsets).
1400
1401 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1402 I->getOperand(0)->getType());
1403 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1404 bool IsVectorAccess = MemAccessTy->isVectorTy();
1405
1406 // A store of an extracted vector element will be combined into a VSTE type
1407 // instruction.
1408 if (!IsVectorAccess && isa<StoreInst>(I)) {
1409 Value *DataOp = I->getOperand(0);
1410 if (isa<ExtractElementInst>(DataOp))
1411 IsVectorAccess = true;
1412 }
1413
1414 // A load which gets inserted into a vector element will be combined into a
1415 // VLE type instruction.
1416 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1417 User *LoadUser = *I->user_begin();
1418 if (isa<InsertElementInst>(LoadUser))
1419 IsVectorAccess = true;
1420 }
1421
1422 if (IsFPAccess || IsVectorAccess)
1423 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1424 }
1425
1426 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1427}
1428
1430 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1431 // Punt on globals for now, although they can be used in limited
1432 // RELATIVE LONG cases.
1433 if (AM.BaseGV)
1434 return false;
1435
1436 // Require a 20-bit signed offset.
1437 if (!isInt<20>(AM.BaseOffs))
1438 return false;
1439
1440 bool RequireD12 =
1441 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1442 AddressingMode SupportedAM(!RequireD12, true);
1443 if (I != nullptr)
1444 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1445
1446 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1447 return false;
1448
1449 if (!SupportedAM.IndexReg)
1450 // No indexing allowed.
1451 return AM.Scale == 0;
1452 else
1453 // Indexing is OK but no scale factor can be applied.
1454 return AM.Scale == 0 || AM.Scale == 1;
1455}
1456
1458 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1459 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1460 const AttributeList &FuncAttributes, EVT *LargestVT) const {
1461 const int MVCFastLen = 16;
1462
1463 if (Limit != ~unsigned(0)) {
1464 // Don't expand Op into scalar loads/stores in these cases:
1465 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1466 return false; // Small memcpy: Use MVC
1467 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1468 return false; // Small memset (first byte with STC/MVI): Use MVC
1469 if (Op.isZeroMemset())
1470 return false; // Memset zero: Use XC
1471 }
1472
1474 Context, MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, LargestVT);
1475}
1476
1478 LLVMContext &Context, const MemOp &Op,
1479 const AttributeList &FuncAttributes) const {
1480 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1481}
1482
1483bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1484 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1485 return false;
1486 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1487 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1488 return FromBits > ToBits;
1489}
1490
1492 if (!FromVT.isInteger() || !ToVT.isInteger())
1493 return false;
1494 unsigned FromBits = FromVT.getFixedSizeInBits();
1495 unsigned ToBits = ToVT.getFixedSizeInBits();
1496 return FromBits > ToBits;
1497}
1498
1499//===----------------------------------------------------------------------===//
1500// Inline asm support
1501//===----------------------------------------------------------------------===//
1502
1505 if (Constraint.size() == 1) {
1506 switch (Constraint[0]) {
1507 case 'a': // Address register
1508 case 'd': // Data register (equivalent to 'r')
1509 case 'f': // Floating-point register
1510 case 'h': // High-part register
1511 case 'r': // General-purpose register
1512 case 'v': // Vector register
1513 return C_RegisterClass;
1514
1515 case 'Q': // Memory with base and unsigned 12-bit displacement
1516 case 'R': // Likewise, plus an index
1517 case 'S': // Memory with base and signed 20-bit displacement
1518 case 'T': // Likewise, plus an index
1519 case 'm': // Equivalent to 'T'.
1520 return C_Memory;
1521
1522 case 'I': // Unsigned 8-bit constant
1523 case 'J': // Unsigned 12-bit constant
1524 case 'K': // Signed 16-bit constant
1525 case 'L': // Signed 20-bit displacement (on all targets we support)
1526 case 'M': // 0x7fffffff
1527 return C_Immediate;
1528
1529 default:
1530 break;
1531 }
1532 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1533 switch (Constraint[1]) {
1534 case 'Q': // Address with base and unsigned 12-bit displacement
1535 case 'R': // Likewise, plus an index
1536 case 'S': // Address with base and signed 20-bit displacement
1537 case 'T': // Likewise, plus an index
1538 return C_Address;
1539
1540 default:
1541 break;
1542 }
1543 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1544 if (StringRef("{@cc}").compare(Constraint) == 0)
1545 return C_Other;
1546 }
1547 return TargetLowering::getConstraintType(Constraint);
1548}
1549
1552 AsmOperandInfo &Info, const char *Constraint) const {
1554 Value *CallOperandVal = Info.CallOperandVal;
1555 // If we don't have a value, we can't do a match,
1556 // but allow it at the lowest weight.
1557 if (!CallOperandVal)
1558 return CW_Default;
1559 Type *type = CallOperandVal->getType();
1560 // Look at the constraint type.
1561 switch (*Constraint) {
1562 default:
1563 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1564 break;
1565
1566 case 'a': // Address register
1567 case 'd': // Data register (equivalent to 'r')
1568 case 'h': // High-part register
1569 case 'r': // General-purpose register
1570 Weight =
1571 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1572 break;
1573
1574 case 'f': // Floating-point register
1575 if (!useSoftFloat())
1576 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1577 break;
1578
1579 case 'v': // Vector register
1580 if (Subtarget.hasVector())
1581 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1582 : CW_Default;
1583 break;
1584
1585 case 'I': // Unsigned 8-bit constant
1586 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1587 if (isUInt<8>(C->getZExtValue()))
1588 Weight = CW_Constant;
1589 break;
1590
1591 case 'J': // Unsigned 12-bit constant
1592 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1593 if (isUInt<12>(C->getZExtValue()))
1594 Weight = CW_Constant;
1595 break;
1596
1597 case 'K': // Signed 16-bit constant
1598 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1599 if (isInt<16>(C->getSExtValue()))
1600 Weight = CW_Constant;
1601 break;
1602
1603 case 'L': // Signed 20-bit displacement (on all targets we support)
1604 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1605 if (isInt<20>(C->getSExtValue()))
1606 Weight = CW_Constant;
1607 break;
1608
1609 case 'M': // 0x7fffffff
1610 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1611 if (C->getZExtValue() == 0x7fffffff)
1612 Weight = CW_Constant;
1613 break;
1614 }
1615 return Weight;
1616}
1617
1618// Parse a "{tNNN}" register constraint for which the register type "t"
1619// has already been verified. MC is the class associated with "t" and
1620// Map maps 0-based register numbers to LLVM register numbers.
1621static std::pair<unsigned, const TargetRegisterClass *>
1623 const unsigned *Map, unsigned Size) {
1624 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1625 if (isdigit(Constraint[2])) {
1626 unsigned Index;
1627 bool Failed =
1628 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1629 if (!Failed && Index < Size && Map[Index])
1630 return std::make_pair(Map[Index], RC);
1631 }
1632 return std::make_pair(0U, nullptr);
1633}
1634
1635std::pair<unsigned, const TargetRegisterClass *>
1637 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1638 if (Constraint.size() == 1) {
1639 // GCC Constraint Letters
1640 switch (Constraint[0]) {
1641 default: break;
1642 case 'd': // Data register (equivalent to 'r')
1643 case 'r': // General-purpose register
1644 if (VT.getSizeInBits() == 64)
1645 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1646 else if (VT.getSizeInBits() == 128)
1647 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1648 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1649
1650 case 'a': // Address register
1651 if (VT == MVT::i64)
1652 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1653 else if (VT == MVT::i128)
1654 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1655 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1656
1657 case 'h': // High-part register (an LLVM extension)
1658 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1659
1660 case 'f': // Floating-point register
1661 if (!useSoftFloat()) {
1662 if (VT.getSizeInBits() == 16)
1663 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1664 else if (VT.getSizeInBits() == 64)
1665 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1666 else if (VT.getSizeInBits() == 128)
1667 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1668 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1669 }
1670 break;
1671
1672 case 'v': // Vector register
1673 if (Subtarget.hasVector()) {
1674 if (VT.getSizeInBits() == 16)
1675 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1676 if (VT.getSizeInBits() == 32)
1677 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1678 if (VT.getSizeInBits() == 64)
1679 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1680 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1681 }
1682 break;
1683 }
1684 }
1685 if (Constraint.starts_with("{")) {
1686
1687 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1688 // to check the size on.
1689 auto getVTSizeInBits = [&VT]() {
1690 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1691 };
1692
1693 // We need to override the default register parsing for GPRs and FPRs
1694 // because the interpretation depends on VT. The internal names of
1695 // the registers are also different from the external names
1696 // (F0D and F0S instead of F0, etc.).
1697 if (Constraint[1] == 'r') {
1698 if (getVTSizeInBits() == 32)
1699 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1701 if (getVTSizeInBits() == 128)
1702 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1704 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1706 }
1707 if (Constraint[1] == 'f') {
1708 if (useSoftFloat())
1709 return std::make_pair(
1710 0u, static_cast<const TargetRegisterClass *>(nullptr));
1711 if (getVTSizeInBits() == 16)
1712 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1714 if (getVTSizeInBits() == 32)
1715 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1717 if (getVTSizeInBits() == 128)
1718 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1720 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1722 }
1723 if (Constraint[1] == 'v') {
1724 if (!Subtarget.hasVector())
1725 return std::make_pair(
1726 0u, static_cast<const TargetRegisterClass *>(nullptr));
1727 if (getVTSizeInBits() == 16)
1728 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1730 if (getVTSizeInBits() == 32)
1731 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1733 if (getVTSizeInBits() == 64)
1734 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1736 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1738 }
1739 if (Constraint[1] == '@') {
1740 if (StringRef("{@cc}").compare(Constraint) == 0)
1741 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1742 }
1743 }
1744 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1745}
1746
1747// FIXME? Maybe this could be a TableGen attribute on some registers and
1748// this table could be generated automatically from RegInfo.
1751 const MachineFunction &MF) const {
1752 Register Reg =
1754 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1755 : SystemZ::NoRegister)
1756 .Case("r15",
1757 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1758 .Default(Register());
1759
1760 return Reg;
1761}
1762
1764 const Constant *PersonalityFn) const {
1765 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1766}
1767
1769 const Constant *PersonalityFn) const {
1770 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1771}
1772
1773// Convert condition code in CCReg to an i32 value.
1775 SDLoc DL(CCReg);
1776 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1777 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1778 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1779}
1780
1781// Lower @cc targets via setcc.
1783 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1784 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1785 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1786 return SDValue();
1787
1788 // Check that return type is valid.
1789 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1790 OpInfo.ConstraintVT.getSizeInBits() < 8)
1791 report_fatal_error("Glue output operand is of invalid type");
1792
1793 if (Glue.getNode()) {
1794 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1795 Chain = Glue.getValue(1);
1796 } else
1797 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1798 return getCCResult(DAG, Glue);
1799}
1800
1802 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1803 SelectionDAG &DAG) const {
1804 // Only support length 1 constraints for now.
1805 if (Constraint.size() == 1) {
1806 switch (Constraint[0]) {
1807 case 'I': // Unsigned 8-bit constant
1808 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1809 if (isUInt<8>(C->getZExtValue()))
1810 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1811 Op.getValueType()));
1812 return;
1813
1814 case 'J': // Unsigned 12-bit constant
1815 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1816 if (isUInt<12>(C->getZExtValue()))
1817 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1818 Op.getValueType()));
1819 return;
1820
1821 case 'K': // Signed 16-bit constant
1822 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1823 if (isInt<16>(C->getSExtValue()))
1824 Ops.push_back(DAG.getSignedTargetConstant(
1825 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1826 return;
1827
1828 case 'L': // Signed 20-bit displacement (on all targets we support)
1829 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1830 if (isInt<20>(C->getSExtValue()))
1831 Ops.push_back(DAG.getSignedTargetConstant(
1832 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1833 return;
1834
1835 case 'M': // 0x7fffffff
1836 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1837 if (C->getZExtValue() == 0x7fffffff)
1838 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1839 Op.getValueType()));
1840 return;
1841 }
1842 }
1844}
1845
1846//===----------------------------------------------------------------------===//
1847// Calling conventions
1848//===----------------------------------------------------------------------===//
1849
1850#include "SystemZGenCallingConv.inc"
1851
1853 CallingConv::ID) const {
1854 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1855 SystemZ::R14D, 0 };
1856 return ScratchRegs;
1857}
1858
1860 Type *ToType) const {
1861 return isTruncateFree(FromType, ToType);
1862}
1863
1865 return CI->isTailCall();
1866}
1867
1868// Value is a value that has been passed to us in the location described by VA
1869// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1870// any loads onto Chain.
1872 CCValAssign &VA, SDValue Chain,
1873 SDValue Value) {
1874 // If the argument has been promoted from a smaller type, insert an
1875 // assertion to capture this.
1876 if (VA.getLocInfo() == CCValAssign::SExt)
1878 DAG.getValueType(VA.getValVT()));
1879 else if (VA.getLocInfo() == CCValAssign::ZExt)
1881 DAG.getValueType(VA.getValVT()));
1882
1883 if (VA.isExtInLoc())
1884 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1885 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1886 // If this is a short vector argument loaded from the stack,
1887 // extend from i64 to full vector size and then bitcast.
1888 assert(VA.getLocVT() == MVT::i64);
1889 assert(VA.getValVT().isVector());
1890 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1891 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1892 } else
1893 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1894 return Value;
1895}
1896
1897// Value is a value of type VA.getValVT() that we need to copy into
1898// the location described by VA. Return a copy of Value converted to
1899// VA.getValVT(). The caller is responsible for handling indirect values.
1901 CCValAssign &VA, SDValue Value) {
1902 switch (VA.getLocInfo()) {
1903 case CCValAssign::SExt:
1904 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1905 case CCValAssign::ZExt:
1906 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1907 case CCValAssign::AExt:
1908 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1909 case CCValAssign::BCvt: {
1910 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1911 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1912 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1913 // For an f32 vararg we need to first promote it to an f64 and then
1914 // bitcast it to an i64.
1915 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1916 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1917 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1918 ? MVT::v2i64
1919 : VA.getLocVT();
1920 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1921 // For ELF, this is a short vector argument to be stored to the stack,
1922 // bitcast to v2i64 and then extract first element.
1923 if (BitCastToType == MVT::v2i64)
1924 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1925 DAG.getConstant(0, DL, MVT::i32));
1926 return Value;
1927 }
1928 case CCValAssign::Full:
1929 return Value;
1930 default:
1931 llvm_unreachable("Unhandled getLocInfo()");
1932 }
1933}
1934
1936 SDLoc DL(In);
1937 SDValue Lo, Hi;
1938 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1939 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1940 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1941 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1942 DAG.getConstant(64, DL, MVT::i32)));
1943 } else {
1944 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1945 }
1946
1947 // FIXME: If v2i64 were a legal type, we could use it instead of
1948 // Untyped here. This might enable improved folding.
1949 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1950 MVT::Untyped, Hi, Lo);
1951 return SDValue(Pair, 0);
1952}
1953
1955 SDLoc DL(In);
1956 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1957 DL, MVT::i64, In);
1958 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1959 DL, MVT::i64, In);
1960
1961 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1962 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1963 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1964 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1965 DAG.getConstant(64, DL, MVT::i32));
1966 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1967 } else {
1968 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1969 }
1970}
1971
1973 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1974 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1975 EVT ValueVT = Val.getValueType();
1976 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1977 // Inline assembly operand.
1978 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1979 return true;
1980 }
1981
1982 return false;
1983}
1984
1986 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1987 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1988 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1989 // Inline assembly operand.
1990 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1991 return DAG.getBitcast(ValueVT, Res);
1992 }
1993
1994 return SDValue();
1995}
1996
1997// The first part of a split stack argument is at index I in Args (and
1998// ArgLocs). Return the type of a part and the number of them by reference.
1999template <class ArgTy>
2001 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
2002 MVT &PartVT, unsigned &NumParts) {
2003 if (!Args[I].Flags.isSplit())
2004 return false;
2005 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
2006 "ArgLocs havoc.");
2007 PartVT = ArgLocs[I].getValVT();
2008 NumParts = 1;
2009 for (unsigned PartIdx = I + 1;; ++PartIdx) {
2010 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
2011 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
2012 ++NumParts;
2013 if (Args[PartIdx].Flags.isSplitEnd())
2014 break;
2015 }
2016 return true;
2017}
2018
2020 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2021 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2022 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2024 MachineFrameInfo &MFI = MF.getFrameInfo();
2025 MachineRegisterInfo &MRI = MF.getRegInfo();
2026 SystemZMachineFunctionInfo *FuncInfo =
2028 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2029 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2030
2031 // Assign locations to all of the incoming arguments.
2033 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2034 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
2035 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2036
2037 unsigned NumFixedGPRs = 0;
2038 unsigned NumFixedFPRs = 0;
2039 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2040 SDValue ArgValue;
2041 CCValAssign &VA = ArgLocs[I];
2042 EVT LocVT = VA.getLocVT();
2043 if (VA.isRegLoc()) {
2044 // Arguments passed in registers
2045 const TargetRegisterClass *RC;
2046 switch (LocVT.getSimpleVT().SimpleTy) {
2047 default:
2048 // Integers smaller than i64 should be promoted to i64.
2049 llvm_unreachable("Unexpected argument type");
2050 case MVT::i32:
2051 NumFixedGPRs += 1;
2052 RC = &SystemZ::GR32BitRegClass;
2053 break;
2054 case MVT::i64:
2055 NumFixedGPRs += 1;
2056 RC = &SystemZ::GR64BitRegClass;
2057 break;
2058 case MVT::f16:
2059 NumFixedFPRs += 1;
2060 RC = &SystemZ::FP16BitRegClass;
2061 break;
2062 case MVT::f32:
2063 NumFixedFPRs += 1;
2064 RC = &SystemZ::FP32BitRegClass;
2065 break;
2066 case MVT::f64:
2067 NumFixedFPRs += 1;
2068 RC = &SystemZ::FP64BitRegClass;
2069 break;
2070 case MVT::f128:
2071 NumFixedFPRs += 2;
2072 RC = &SystemZ::FP128BitRegClass;
2073 break;
2074 case MVT::v16i8:
2075 case MVT::v8i16:
2076 case MVT::v4i32:
2077 case MVT::v2i64:
2078 case MVT::v8f16:
2079 case MVT::v4f32:
2080 case MVT::v2f64:
2081 RC = &SystemZ::VR128BitRegClass;
2082 break;
2083 }
2084
2085 Register VReg = MRI.createVirtualRegister(RC);
2086 MRI.addLiveIn(VA.getLocReg(), VReg);
2087 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2088 } else {
2089 assert(VA.isMemLoc() && "Argument not register or memory");
2090
2091 // Create the frame index object for this incoming parameter.
2092 // FIXME: Pre-include call frame size in the offset, should not
2093 // need to manually add it here.
2094 int64_t ArgSPOffset = VA.getLocMemOffset();
2095 if (Subtarget.isTargetXPLINK64()) {
2096 auto &XPRegs =
2097 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2098 ArgSPOffset += XPRegs.getCallFrameSize();
2099 }
2100 int FI =
2101 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2102
2103 // Create the SelectionDAG nodes corresponding to a load
2104 // from this parameter. Unpromoted ints and floats are
2105 // passed as right-justified 8-byte values.
2106 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2107 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2108 VA.getLocVT() == MVT::f16) {
2109 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2110 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2111 DAG.getIntPtrConstant(SlotOffs, DL));
2112 }
2113 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2115 }
2116
2117 // Convert the value of the argument register into the value that's
2118 // being passed.
2119 if (VA.getLocInfo() == CCValAssign::Indirect) {
2120 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2122 // If the original argument was split (e.g. i128), we need
2123 // to load all parts of it here (using the same address).
2124 MVT PartVT;
2125 unsigned NumParts;
2126 if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
2127 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2128 ++I;
2129 CCValAssign &PartVA = ArgLocs[I];
2130 unsigned PartOffset = Ins[I].PartOffset;
2131 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2132 DAG.getIntPtrConstant(PartOffset, DL));
2133 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2135 assert(PartOffset && "Offset should be non-zero.");
2136 }
2137 }
2138 } else
2139 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2140 }
2141
2142 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2143 // Save the number of non-varargs registers for later use by va_start, etc.
2144 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2145 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2146
2147 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2148 Subtarget.getSpecialRegisters());
2149
2150 // Likewise the address (in the form of a frame index) of where the
2151 // first stack vararg would be. The 1-byte size here is arbitrary.
2152 // FIXME: Pre-include call frame size in the offset, should not
2153 // need to manually add it here.
2154 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2155 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2156 FuncInfo->setVarArgsFrameIndex(FI);
2157 }
2158
2159 if (IsVarArg && Subtarget.isTargetELF()) {
2160 // Save the number of non-varargs registers for later use by va_start, etc.
2161 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2162 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2163
2164 // Likewise the address (in the form of a frame index) of where the
2165 // first stack vararg would be. The 1-byte size here is arbitrary.
2166 int64_t VarArgsOffset = CCInfo.getStackSize();
2167 FuncInfo->setVarArgsFrameIndex(
2168 MFI.CreateFixedObject(1, VarArgsOffset, true));
2169
2170 // ...and a similar frame index for the caller-allocated save area
2171 // that will be used to store the incoming registers.
2172 int64_t RegSaveOffset =
2173 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2174 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2175 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2176
2177 // Store the FPR varargs in the reserved frame slots. (We store the
2178 // GPRs as part of the prologue.)
2179 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2181 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2182 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2183 int FI =
2185 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2187 &SystemZ::FP64BitRegClass);
2188 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2189 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2191 }
2192 // Join the stores, which are independent of one another.
2193 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2194 ArrayRef(&MemOps[NumFixedFPRs],
2195 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2196 }
2197 }
2198
2199 if (Subtarget.isTargetXPLINK64()) {
2200 // Create virual register for handling incoming "ADA" special register (R5)
2201 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2202 Register ADAvReg = MRI.createVirtualRegister(RC);
2203 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2204 Subtarget.getSpecialRegisters());
2205 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2206 FuncInfo->setADAVirtualRegister(ADAvReg);
2207 }
2208 return Chain;
2209}
2210
2211static bool canUseSiblingCall(const CCState &ArgCCInfo,
2214 // Punt if there are any indirect or stack arguments, or if the call
2215 // needs the callee-saved argument register R6, or if the call uses
2216 // the callee-saved register arguments SwiftSelf and SwiftError.
2217 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2218 CCValAssign &VA = ArgLocs[I];
2220 return false;
2221 if (!VA.isRegLoc())
2222 return false;
2223 Register Reg = VA.getLocReg();
2224 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2225 return false;
2226 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2227 return false;
2228 }
2229 return true;
2230}
2231
2233 unsigned Offset, bool LoadAdr = false) {
2236 Register ADAvReg = MFI->getADAVirtualRegister();
2238
2239 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2240 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2241
2242 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2243 if (!LoadAdr)
2244 Result = DAG.getLoad(
2245 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2247
2248 return Result;
2249}
2250
2251// ADA access using Global value
2252// Note: for functions, address of descriptor is returned
2254 EVT PtrVT) {
2255 unsigned ADAtype;
2256 bool LoadAddr = false;
2257 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2258 bool IsFunction =
2259 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2260 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2261
2262 if (IsFunction) {
2263 if (IsInternal) {
2265 LoadAddr = true;
2266 } else
2268 } else {
2270 }
2271 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2272
2273 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2274}
2275
2276static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2277 SDLoc &DL, SDValue &Chain) {
2278 unsigned ADADelta = 0; // ADA offset in desc.
2279 unsigned EPADelta = 8; // EPA offset in desc.
2282
2283 // XPLink calling convention.
2284 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2285 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2286 G->getGlobal()->hasPrivateLinkage());
2287 if (IsInternal) {
2290 Register ADAvReg = MFI->getADAVirtualRegister();
2291 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2292 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2293 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2294 return true;
2295 } else {
2297 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2298 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2299 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2300 }
2301 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2303 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2304 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2305 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2306 } else {
2307 // Function pointer case
2308 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2309 DAG.getConstant(ADADelta, DL, PtrVT));
2310 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2312 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2313 DAG.getConstant(EPADelta, DL, PtrVT));
2314 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2316 }
2317 return false;
2318}
2319
2320SDValue
2322 SmallVectorImpl<SDValue> &InVals) const {
2323 SelectionDAG &DAG = CLI.DAG;
2324 SDLoc &DL = CLI.DL;
2326 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2328 SDValue Chain = CLI.Chain;
2329 SDValue Callee = CLI.Callee;
2330 bool &IsTailCall = CLI.IsTailCall;
2331 CallingConv::ID CallConv = CLI.CallConv;
2332 bool IsVarArg = CLI.IsVarArg;
2334 EVT PtrVT = getPointerTy(MF.getDataLayout());
2335 LLVMContext &Ctx = *DAG.getContext();
2336 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2337
2338 // FIXME: z/OS support to be added in later.
2339 if (Subtarget.isTargetXPLINK64())
2340 IsTailCall = false;
2341
2342 // Integer args <=32 bits should have an extension attribute.
2343 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2344
2345 // Analyze the operands of the call, assigning locations to each operand.
2347 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2348 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2349
2350 // We don't support GuaranteedTailCallOpt, only automatically-detected
2351 // sibling calls.
2352 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2353 IsTailCall = false;
2354
2355 // Get a count of how many bytes are to be pushed on the stack.
2356 unsigned NumBytes = ArgCCInfo.getStackSize();
2357
2358 // Mark the start of the call.
2359 if (!IsTailCall)
2360 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2361
2362 // Copy argument values to their designated locations.
2364 SmallVector<SDValue, 8> MemOpChains;
2365 SDValue StackPtr;
2366 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2367 CCValAssign &VA = ArgLocs[I];
2368 SDValue ArgValue = OutVals[I];
2369
2370 if (VA.getLocInfo() == CCValAssign::Indirect) {
2371 // Store the argument in a stack slot and pass its address.
2372 EVT SlotVT;
2373 MVT PartVT;
2374 unsigned NumParts = 1;
2375 if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
2376 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
2377 else
2378 SlotVT = Outs[I].VT;
2379 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2380 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2381
2382 MachinePointerInfo StackPtrInfo =
2384 MemOpChains.push_back(
2385 DAG.getStore(Chain, DL, ArgValue, SpillSlot, StackPtrInfo));
2386 // If the original argument was split (e.g. i128), we need
2387 // to store all parts of it here (and pass just one address).
2388 assert(Outs[I].PartOffset == 0);
2389 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2390 ++I;
2391 SDValue PartValue = OutVals[I];
2392 unsigned PartOffset = Outs[I].PartOffset;
2393 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2394 DAG.getIntPtrConstant(PartOffset, DL));
2395 MemOpChains.push_back(
2396 DAG.getStore(Chain, DL, PartValue, Address,
2397 StackPtrInfo.getWithOffset(PartOffset)));
2398 assert(PartOffset && "Offset should be non-zero.");
2399 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2400 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2401 }
2402 ArgValue = SpillSlot;
2403 } else
2404 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2405
2406 if (VA.isRegLoc()) {
2407 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2408 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2409 // and low values.
2410 if (VA.getLocVT() == MVT::i128)
2411 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2412 // Queue up the argument copies and emit them at the end.
2413 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2414 } else {
2415 assert(VA.isMemLoc() && "Argument not register or memory");
2416
2417 // Work out the address of the stack slot. Unpromoted ints and
2418 // floats are passed as right-justified 8-byte values.
2419 if (!StackPtr.getNode())
2420 StackPtr = DAG.getCopyFromReg(Chain, DL,
2421 Regs->getStackPointerRegister(), PtrVT);
2422 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2423 VA.getLocMemOffset();
2424 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2425 Offset += 4;
2426 else if (VA.getLocVT() == MVT::f16)
2427 Offset += 6;
2428 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2430
2431 // Emit the store.
2432 MemOpChains.push_back(
2433 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2434
2435 // Although long doubles or vectors are passed through the stack when
2436 // they are vararg (non-fixed arguments), if a long double or vector
2437 // occupies the third and fourth slot of the argument list GPR3 should
2438 // still shadow the third slot of the argument list.
2439 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2440 SDValue ShadowArgValue =
2441 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2442 DAG.getIntPtrConstant(1, DL));
2443 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2444 }
2445 }
2446 }
2447
2448 // Join the stores, which are independent of one another.
2449 if (!MemOpChains.empty())
2450 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2451
2452 // Accept direct calls by converting symbolic call addresses to the
2453 // associated Target* opcodes. Force %r1 to be used for indirect
2454 // tail calls.
2455 SDValue Glue;
2456
2457 if (Subtarget.isTargetXPLINK64()) {
2458 SDValue ADA;
2459 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2460 if (!IsBRASL) {
2461 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2462 ->getAddressOfCalleeRegister();
2463 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2464 Glue = Chain.getValue(1);
2465 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2466 }
2467 RegsToPass.push_back(std::make_pair(
2468 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2469 } else {
2470 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2471 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2472 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2473 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2474 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2475 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2476 } else if (IsTailCall) {
2477 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2478 Glue = Chain.getValue(1);
2479 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2480 }
2481 }
2482
2483 // Build a sequence of copy-to-reg nodes, chained and glued together.
2484 for (const auto &[Reg, N] : RegsToPass) {
2485 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2486 Glue = Chain.getValue(1);
2487 }
2488
2489 // The first call operand is the chain and the second is the target address.
2491 Ops.push_back(Chain);
2492 Ops.push_back(Callee);
2493
2494 // Add argument registers to the end of the list so that they are
2495 // known live into the call.
2496 for (const auto &[Reg, N] : RegsToPass)
2497 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2498
2499 // Add a register mask operand representing the call-preserved registers.
2500 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2501 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2502 assert(Mask && "Missing call preserved mask for calling convention");
2503 Ops.push_back(DAG.getRegisterMask(Mask));
2504
2505 // Glue the call to the argument copies, if any.
2506 if (Glue.getNode())
2507 Ops.push_back(Glue);
2508
2509 // Emit the call.
2510 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2511 if (IsTailCall) {
2512 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2513 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2514 return Ret;
2515 }
2516 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2517 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2518 Glue = Chain.getValue(1);
2519
2520 // Mark the end of the call, which is glued to the call itself.
2521 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2522 Glue = Chain.getValue(1);
2523
2524 // Assign locations to each value returned by this call.
2526 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2527 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2528
2529 // Copy all of the result registers out of their specified physreg.
2530 for (CCValAssign &VA : RetLocs) {
2531 // Copy the value out, gluing the copy to the end of the call sequence.
2532 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2533 VA.getLocVT(), Glue);
2534 Chain = RetValue.getValue(1);
2535 Glue = RetValue.getValue(2);
2536
2537 // Convert the value of the return register into the value that's
2538 // being returned.
2539 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2540 }
2541
2542 return Chain;
2543}
2544
2545// Generate a call taking the given operands as arguments and returning a
2546// result of type RetVT.
2548 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2549 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2550 bool DoesNotReturn, bool IsReturnValueUsed) const {
2552 Args.reserve(Ops.size());
2553
2554 for (SDValue Op : Ops) {
2556 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2557 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2558 Entry.IsZExt = !Entry.IsSExt;
2559 Args.push_back(Entry);
2560 }
2561
2562 SDValue Callee =
2563 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2564
2565 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2567 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2568 CLI.setDebugLoc(DL)
2569 .setChain(Chain)
2570 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2571 .setNoReturn(DoesNotReturn)
2572 .setDiscardResult(!IsReturnValueUsed)
2573 .setSExtResult(SignExtend)
2574 .setZExtResult(!SignExtend);
2575 return LowerCallTo(CLI);
2576}
2577
2579 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2580 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2581 const Type *RetTy) const {
2582 // Special case that we cannot easily detect in RetCC_SystemZ since
2583 // i128 may not be a legal type.
2584 for (auto &Out : Outs)
2585 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2586 return false;
2587
2589 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2590 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2591}
2592
2593SDValue
2595 bool IsVarArg,
2597 const SmallVectorImpl<SDValue> &OutVals,
2598 const SDLoc &DL, SelectionDAG &DAG) const {
2600
2601 // Integer args <=32 bits should have an extension attribute.
2602 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2603
2604 // Assign locations to each returned value.
2606 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2607 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2608
2609 // Quick exit for void returns
2610 if (RetLocs.empty())
2611 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2612
2613 if (CallConv == CallingConv::GHC)
2614 report_fatal_error("GHC functions return void only");
2615
2616 // Copy the result values into the output registers.
2617 SDValue Glue;
2619 RetOps.push_back(Chain);
2620 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2621 CCValAssign &VA = RetLocs[I];
2622 SDValue RetValue = OutVals[I];
2623
2624 // Make the return register live on exit.
2625 assert(VA.isRegLoc() && "Can only return in registers!");
2626
2627 // Promote the value as required.
2628 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2629
2630 // Chain and glue the copies together.
2631 Register Reg = VA.getLocReg();
2632 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2633 Glue = Chain.getValue(1);
2634 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2635 }
2636
2637 // Update chain and glue.
2638 RetOps[0] = Chain;
2639 if (Glue.getNode())
2640 RetOps.push_back(Glue);
2641
2642 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2643}
2644
2645// Return true if Op is an intrinsic node with chain that returns the CC value
2646// as its only (other) argument. Provide the associated SystemZISD opcode and
2647// the mask of valid CC values if so.
2648static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2649 unsigned &CCValid) {
2650 unsigned Id = Op.getConstantOperandVal(1);
2651 switch (Id) {
2652 case Intrinsic::s390_tbegin:
2653 Opcode = SystemZISD::TBEGIN;
2654 CCValid = SystemZ::CCMASK_TBEGIN;
2655 return true;
2656
2657 case Intrinsic::s390_tbegin_nofloat:
2658 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2659 CCValid = SystemZ::CCMASK_TBEGIN;
2660 return true;
2661
2662 case Intrinsic::s390_tend:
2663 Opcode = SystemZISD::TEND;
2664 CCValid = SystemZ::CCMASK_TEND;
2665 return true;
2666
2667 default:
2668 return false;
2669 }
2670}
2671
2672// Return true if Op is an intrinsic node without chain that returns the
2673// CC value as its final argument. Provide the associated SystemZISD
2674// opcode and the mask of valid CC values if so.
2675static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2676 unsigned Id = Op.getConstantOperandVal(0);
2677 switch (Id) {
2678 case Intrinsic::s390_vpkshs:
2679 case Intrinsic::s390_vpksfs:
2680 case Intrinsic::s390_vpksgs:
2681 Opcode = SystemZISD::PACKS_CC;
2682 CCValid = SystemZ::CCMASK_VCMP;
2683 return true;
2684
2685 case Intrinsic::s390_vpklshs:
2686 case Intrinsic::s390_vpklsfs:
2687 case Intrinsic::s390_vpklsgs:
2688 Opcode = SystemZISD::PACKLS_CC;
2689 CCValid = SystemZ::CCMASK_VCMP;
2690 return true;
2691
2692 case Intrinsic::s390_vceqbs:
2693 case Intrinsic::s390_vceqhs:
2694 case Intrinsic::s390_vceqfs:
2695 case Intrinsic::s390_vceqgs:
2696 case Intrinsic::s390_vceqqs:
2697 Opcode = SystemZISD::VICMPES;
2698 CCValid = SystemZ::CCMASK_VCMP;
2699 return true;
2700
2701 case Intrinsic::s390_vchbs:
2702 case Intrinsic::s390_vchhs:
2703 case Intrinsic::s390_vchfs:
2704 case Intrinsic::s390_vchgs:
2705 case Intrinsic::s390_vchqs:
2706 Opcode = SystemZISD::VICMPHS;
2707 CCValid = SystemZ::CCMASK_VCMP;
2708 return true;
2709
2710 case Intrinsic::s390_vchlbs:
2711 case Intrinsic::s390_vchlhs:
2712 case Intrinsic::s390_vchlfs:
2713 case Intrinsic::s390_vchlgs:
2714 case Intrinsic::s390_vchlqs:
2715 Opcode = SystemZISD::VICMPHLS;
2716 CCValid = SystemZ::CCMASK_VCMP;
2717 return true;
2718
2719 case Intrinsic::s390_vtm:
2720 Opcode = SystemZISD::VTM;
2721 CCValid = SystemZ::CCMASK_VCMP;
2722 return true;
2723
2724 case Intrinsic::s390_vfaebs:
2725 case Intrinsic::s390_vfaehs:
2726 case Intrinsic::s390_vfaefs:
2727 Opcode = SystemZISD::VFAE_CC;
2728 CCValid = SystemZ::CCMASK_ANY;
2729 return true;
2730
2731 case Intrinsic::s390_vfaezbs:
2732 case Intrinsic::s390_vfaezhs:
2733 case Intrinsic::s390_vfaezfs:
2734 Opcode = SystemZISD::VFAEZ_CC;
2735 CCValid = SystemZ::CCMASK_ANY;
2736 return true;
2737
2738 case Intrinsic::s390_vfeebs:
2739 case Intrinsic::s390_vfeehs:
2740 case Intrinsic::s390_vfeefs:
2741 Opcode = SystemZISD::VFEE_CC;
2742 CCValid = SystemZ::CCMASK_ANY;
2743 return true;
2744
2745 case Intrinsic::s390_vfeezbs:
2746 case Intrinsic::s390_vfeezhs:
2747 case Intrinsic::s390_vfeezfs:
2748 Opcode = SystemZISD::VFEEZ_CC;
2749 CCValid = SystemZ::CCMASK_ANY;
2750 return true;
2751
2752 case Intrinsic::s390_vfenebs:
2753 case Intrinsic::s390_vfenehs:
2754 case Intrinsic::s390_vfenefs:
2755 Opcode = SystemZISD::VFENE_CC;
2756 CCValid = SystemZ::CCMASK_ANY;
2757 return true;
2758
2759 case Intrinsic::s390_vfenezbs:
2760 case Intrinsic::s390_vfenezhs:
2761 case Intrinsic::s390_vfenezfs:
2762 Opcode = SystemZISD::VFENEZ_CC;
2763 CCValid = SystemZ::CCMASK_ANY;
2764 return true;
2765
2766 case Intrinsic::s390_vistrbs:
2767 case Intrinsic::s390_vistrhs:
2768 case Intrinsic::s390_vistrfs:
2769 Opcode = SystemZISD::VISTR_CC;
2771 return true;
2772
2773 case Intrinsic::s390_vstrcbs:
2774 case Intrinsic::s390_vstrchs:
2775 case Intrinsic::s390_vstrcfs:
2776 Opcode = SystemZISD::VSTRC_CC;
2777 CCValid = SystemZ::CCMASK_ANY;
2778 return true;
2779
2780 case Intrinsic::s390_vstrczbs:
2781 case Intrinsic::s390_vstrczhs:
2782 case Intrinsic::s390_vstrczfs:
2783 Opcode = SystemZISD::VSTRCZ_CC;
2784 CCValid = SystemZ::CCMASK_ANY;
2785 return true;
2786
2787 case Intrinsic::s390_vstrsb:
2788 case Intrinsic::s390_vstrsh:
2789 case Intrinsic::s390_vstrsf:
2790 Opcode = SystemZISD::VSTRS_CC;
2791 CCValid = SystemZ::CCMASK_ANY;
2792 return true;
2793
2794 case Intrinsic::s390_vstrszb:
2795 case Intrinsic::s390_vstrszh:
2796 case Intrinsic::s390_vstrszf:
2797 Opcode = SystemZISD::VSTRSZ_CC;
2798 CCValid = SystemZ::CCMASK_ANY;
2799 return true;
2800
2801 case Intrinsic::s390_vfcedbs:
2802 case Intrinsic::s390_vfcesbs:
2803 Opcode = SystemZISD::VFCMPES;
2804 CCValid = SystemZ::CCMASK_VCMP;
2805 return true;
2806
2807 case Intrinsic::s390_vfchdbs:
2808 case Intrinsic::s390_vfchsbs:
2809 Opcode = SystemZISD::VFCMPHS;
2810 CCValid = SystemZ::CCMASK_VCMP;
2811 return true;
2812
2813 case Intrinsic::s390_vfchedbs:
2814 case Intrinsic::s390_vfchesbs:
2815 Opcode = SystemZISD::VFCMPHES;
2816 CCValid = SystemZ::CCMASK_VCMP;
2817 return true;
2818
2819 case Intrinsic::s390_vftcidb:
2820 case Intrinsic::s390_vftcisb:
2821 Opcode = SystemZISD::VFTCI;
2822 CCValid = SystemZ::CCMASK_VCMP;
2823 return true;
2824
2825 case Intrinsic::s390_tdc:
2826 Opcode = SystemZISD::TDC;
2827 CCValid = SystemZ::CCMASK_TDC;
2828 return true;
2829
2830 default:
2831 return false;
2832 }
2833}
2834
2835// Emit an intrinsic with chain and an explicit CC register result.
2837 unsigned Opcode) {
2838 // Copy all operands except the intrinsic ID.
2839 unsigned NumOps = Op.getNumOperands();
2841 Ops.reserve(NumOps - 1);
2842 Ops.push_back(Op.getOperand(0));
2843 for (unsigned I = 2; I < NumOps; ++I)
2844 Ops.push_back(Op.getOperand(I));
2845
2846 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2847 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2848 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2849 SDValue OldChain = SDValue(Op.getNode(), 1);
2850 SDValue NewChain = SDValue(Intr.getNode(), 1);
2851 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2852 return Intr.getNode();
2853}
2854
2855// Emit an intrinsic with an explicit CC register result.
2857 unsigned Opcode) {
2858 // Copy all operands except the intrinsic ID.
2859 SDLoc DL(Op);
2860 unsigned NumOps = Op.getNumOperands();
2862 Ops.reserve(NumOps - 1);
2863 for (unsigned I = 1; I < NumOps; ++I) {
2864 SDValue CurrOper = Op.getOperand(I);
2865 if (CurrOper.getValueType() == MVT::f16) {
2866 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2867 "Unhandled intrinsic with f16 operand.");
2868 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2869 }
2870 Ops.push_back(CurrOper);
2871 }
2872
2873 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2874 return Intr.getNode();
2875}
2876
2877// CC is a comparison that will be implemented using an integer or
2878// floating-point comparison. Return the condition code mask for
2879// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2880// unsigned comparisons and clear for signed ones. In the floating-point
2881// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2883#define CONV(X) \
2884 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2885 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2886 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2887
2888 switch (CC) {
2889 default:
2890 llvm_unreachable("Invalid integer condition!");
2891
2892 CONV(EQ);
2893 CONV(NE);
2894 CONV(GT);
2895 CONV(GE);
2896 CONV(LT);
2897 CONV(LE);
2898
2899 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2901 }
2902#undef CONV
2903}
2904
2905// If C can be converted to a comparison against zero, adjust the operands
2906// as necessary.
2907static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2908 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2909 return;
2910
2911 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2912 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2913 return;
2914
2915 int64_t Value = ConstOp1->getSExtValue();
2916 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2917 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2918 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2919 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2920 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2921 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2922 }
2923}
2924
2925// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2926// adjust the operands as necessary.
2927static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2928 Comparison &C) {
2929 // For us to make any changes, it must a comparison between a single-use
2930 // load and a constant.
2931 if (!C.Op0.hasOneUse() ||
2932 C.Op0.getOpcode() != ISD::LOAD ||
2933 C.Op1.getOpcode() != ISD::Constant)
2934 return;
2935
2936 // We must have an 8- or 16-bit load.
2937 auto *Load = cast<LoadSDNode>(C.Op0);
2938 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2939 if ((NumBits != 8 && NumBits != 16) ||
2940 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2941 return;
2942
2943 // The load must be an extending one and the constant must be within the
2944 // range of the unextended value.
2945 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2946 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2947 return;
2948 uint64_t Value = ConstOp1->getZExtValue();
2949 uint64_t Mask = (1 << NumBits) - 1;
2950 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2951 // Make sure that ConstOp1 is in range of C.Op0.
2952 int64_t SignedValue = ConstOp1->getSExtValue();
2953 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2954 return;
2955 if (C.ICmpType != SystemZICMP::SignedOnly) {
2956 // Unsigned comparison between two sign-extended values is equivalent
2957 // to unsigned comparison between two zero-extended values.
2958 Value &= Mask;
2959 } else if (NumBits == 8) {
2960 // Try to treat the comparison as unsigned, so that we can use CLI.
2961 // Adjust CCMask and Value as necessary.
2962 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2963 // Test whether the high bit of the byte is set.
2964 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2965 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2966 // Test whether the high bit of the byte is clear.
2967 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2968 else
2969 // No instruction exists for this combination.
2970 return;
2971 C.ICmpType = SystemZICMP::UnsignedOnly;
2972 }
2973 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2974 if (Value > Mask)
2975 return;
2976 // If the constant is in range, we can use any comparison.
2977 C.ICmpType = SystemZICMP::Any;
2978 } else
2979 return;
2980
2981 // Make sure that the first operand is an i32 of the right extension type.
2982 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2985 if (C.Op0.getValueType() != MVT::i32 ||
2986 Load->getExtensionType() != ExtType) {
2987 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2988 Load->getBasePtr(), Load->getPointerInfo(),
2989 Load->getMemoryVT(), Load->getAlign(),
2990 Load->getMemOperand()->getFlags());
2991 // Update the chain uses.
2992 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2993 }
2994
2995 // Make sure that the second operand is an i32 with the right value.
2996 if (C.Op1.getValueType() != MVT::i32 ||
2997 Value != ConstOp1->getZExtValue())
2998 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2999}
3000
3001// Return true if Op is either an unextended load, or a load suitable
3002// for integer register-memory comparisons of type ICmpType.
3003static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
3004 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
3005 if (Load) {
3006 // There are no instructions to compare a register with a memory byte.
3007 if (Load->getMemoryVT() == MVT::i8)
3008 return false;
3009 // Otherwise decide on extension type.
3010 switch (Load->getExtensionType()) {
3011 case ISD::NON_EXTLOAD:
3012 return true;
3013 case ISD::SEXTLOAD:
3014 return ICmpType != SystemZICMP::UnsignedOnly;
3015 case ISD::ZEXTLOAD:
3016 return ICmpType != SystemZICMP::SignedOnly;
3017 default:
3018 break;
3019 }
3020 }
3021 return false;
3022}
3023
3024// Return true if it is better to swap the operands of C.
3025static bool shouldSwapCmpOperands(const Comparison &C) {
3026 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3027 if (C.Op0.getValueType() == MVT::i128)
3028 return false;
3029 if (C.Op0.getValueType() == MVT::f128)
3030 return false;
3031
3032 // Always keep a floating-point constant second, since comparisons with
3033 // zero can use LOAD TEST and comparisons with other constants make a
3034 // natural memory operand.
3035 if (isa<ConstantFPSDNode>(C.Op1))
3036 return false;
3037
3038 // Never swap comparisons with zero since there are many ways to optimize
3039 // those later.
3040 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3041 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3042 return false;
3043
3044 // Also keep natural memory operands second if the loaded value is
3045 // only used here. Several comparisons have memory forms.
3046 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3047 return false;
3048
3049 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3050 // In that case we generally prefer the memory to be second.
3051 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3052 // The only exceptions are when the second operand is a constant and
3053 // we can use things like CHHSI.
3054 if (!ConstOp1)
3055 return true;
3056 // The unsigned memory-immediate instructions can handle 16-bit
3057 // unsigned integers.
3058 if (C.ICmpType != SystemZICMP::SignedOnly &&
3059 isUInt<16>(ConstOp1->getZExtValue()))
3060 return false;
3061 // The signed memory-immediate instructions can handle 16-bit
3062 // signed integers.
3063 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3064 isInt<16>(ConstOp1->getSExtValue()))
3065 return false;
3066 return true;
3067 }
3068
3069 // Try to promote the use of CGFR and CLGFR.
3070 unsigned Opcode0 = C.Op0.getOpcode();
3071 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3072 return true;
3073 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3074 return true;
3075 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3076 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3077 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3078 return true;
3079
3080 return false;
3081}
3082
3083// Check whether C tests for equality between X and Y and whether X - Y
3084// or Y - X is also computed. In that case it's better to compare the
3085// result of the subtraction against zero.
3087 Comparison &C) {
3088 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3089 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3090 for (SDNode *N : C.Op0->users()) {
3091 if (N->getOpcode() == ISD::SUB &&
3092 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3093 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3094 // Disable the nsw and nuw flags: the backend needs to handle
3095 // overflow as well during comparison elimination.
3096 N->dropFlags(SDNodeFlags::NoWrap);
3097 C.Op0 = SDValue(N, 0);
3098 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3099 return;
3100 }
3101 }
3102 }
3103}
3104
3105// Check whether C compares a floating-point value with zero and if that
3106// floating-point value is also negated. In this case we can use the
3107// negation to set CC, so avoiding separate LOAD AND TEST and
3108// LOAD (NEGATIVE/COMPLEMENT) instructions.
3109static void adjustForFNeg(Comparison &C) {
3110 // This optimization is invalid for strict comparisons, since FNEG
3111 // does not raise any exceptions.
3112 if (C.Chain)
3113 return;
3114 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3115 if (C1 && C1->isZero()) {
3116 for (SDNode *N : C.Op0->users()) {
3117 if (N->getOpcode() == ISD::FNEG) {
3118 C.Op0 = SDValue(N, 0);
3119 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3120 return;
3121 }
3122 }
3123 }
3124}
3125
3126// Check whether C compares (shl X, 32) with 0 and whether X is
3127// also sign-extended. In that case it is better to test the result
3128// of the sign extension using LTGFR.
3129//
3130// This case is important because InstCombine transforms a comparison
3131// with (sext (trunc X)) into a comparison with (shl X, 32).
3132static void adjustForLTGFR(Comparison &C) {
3133 // Check for a comparison between (shl X, 32) and 0.
3134 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3135 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3136 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3137 if (C1 && C1->getZExtValue() == 32) {
3138 SDValue ShlOp0 = C.Op0.getOperand(0);
3139 // See whether X has any SIGN_EXTEND_INREG uses.
3140 for (SDNode *N : ShlOp0->users()) {
3141 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3142 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3143 C.Op0 = SDValue(N, 0);
3144 return;
3145 }
3146 }
3147 }
3148 }
3149}
3150
3151// If C compares the truncation of an extending load, try to compare
3152// the untruncated value instead. This exposes more opportunities to
3153// reuse CC.
3154static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3155 Comparison &C) {
3156 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3157 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3158 C.Op1.getOpcode() == ISD::Constant &&
3159 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3160 C.Op1->getAsZExtVal() == 0) {
3161 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3162 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3163 C.Op0.getValueSizeInBits().getFixedValue()) {
3164 unsigned Type = L->getExtensionType();
3165 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3166 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3167 C.Op0 = C.Op0.getOperand(0);
3168 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3169 }
3170 }
3171 }
3172}
3173
3174// Return true if shift operation N has an in-range constant shift value.
3175// Store it in ShiftVal if so.
3176static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3177 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3178 if (!Shift)
3179 return false;
3180
3181 uint64_t Amount = Shift->getZExtValue();
3182 if (Amount >= N.getValueSizeInBits())
3183 return false;
3184
3185 ShiftVal = Amount;
3186 return true;
3187}
3188
3189// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3190// instruction and whether the CC value is descriptive enough to handle
3191// a comparison of type Opcode between the AND result and CmpVal.
3192// CCMask says which comparison result is being tested and BitSize is
3193// the number of bits in the operands. If TEST UNDER MASK can be used,
3194// return the corresponding CC mask, otherwise return 0.
3195static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3196 uint64_t Mask, uint64_t CmpVal,
3197 unsigned ICmpType) {
3198 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3199
3200 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3201 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3202 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3203 return 0;
3204
3205 // Work out the masks for the lowest and highest bits.
3207 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3208
3209 // Signed ordered comparisons are effectively unsigned if the sign
3210 // bit is dropped.
3211 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3212
3213 // Check for equality comparisons with 0, or the equivalent.
3214 if (CmpVal == 0) {
3215 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3217 if (CCMask == SystemZ::CCMASK_CMP_NE)
3219 }
3220 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3221 if (CCMask == SystemZ::CCMASK_CMP_LT)
3223 if (CCMask == SystemZ::CCMASK_CMP_GE)
3225 }
3226 if (EffectivelyUnsigned && CmpVal < Low) {
3227 if (CCMask == SystemZ::CCMASK_CMP_LE)
3229 if (CCMask == SystemZ::CCMASK_CMP_GT)
3231 }
3232
3233 // Check for equality comparisons with the mask, or the equivalent.
3234 if (CmpVal == Mask) {
3235 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3237 if (CCMask == SystemZ::CCMASK_CMP_NE)
3239 }
3240 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3241 if (CCMask == SystemZ::CCMASK_CMP_GT)
3243 if (CCMask == SystemZ::CCMASK_CMP_LE)
3245 }
3246 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3247 if (CCMask == SystemZ::CCMASK_CMP_GE)
3249 if (CCMask == SystemZ::CCMASK_CMP_LT)
3251 }
3252
3253 // Check for ordered comparisons with the top bit.
3254 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3255 if (CCMask == SystemZ::CCMASK_CMP_LE)
3257 if (CCMask == SystemZ::CCMASK_CMP_GT)
3259 }
3260 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3261 if (CCMask == SystemZ::CCMASK_CMP_LT)
3263 if (CCMask == SystemZ::CCMASK_CMP_GE)
3265 }
3266
3267 // If there are just two bits, we can do equality checks for Low and High
3268 // as well.
3269 if (Mask == Low + High) {
3270 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3272 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3274 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3276 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3278 }
3279
3280 // Looks like we've exhausted our options.
3281 return 0;
3282}
3283
3284// See whether C can be implemented as a TEST UNDER MASK instruction.
3285// Update the arguments with the TM version if so.
3287 Comparison &C) {
3288 // Use VECTOR TEST UNDER MASK for i128 operations.
3289 if (C.Op0.getValueType() == MVT::i128) {
3290 // We can use VTM for EQ/NE comparisons of x & y against 0.
3291 if (C.Op0.getOpcode() == ISD::AND &&
3292 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3293 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3294 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3295 if (Mask && Mask->getAPIntValue() == 0) {
3296 C.Opcode = SystemZISD::VTM;
3297 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3298 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3299 C.CCValid = SystemZ::CCMASK_VCMP;
3300 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3301 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3302 else
3303 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3304 }
3305 }
3306 return;
3307 }
3308
3309 // Check that we have a comparison with a constant.
3310 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3311 if (!ConstOp1)
3312 return;
3313 uint64_t CmpVal = ConstOp1->getZExtValue();
3314
3315 // Check whether the nonconstant input is an AND with a constant mask.
3316 Comparison NewC(C);
3317 uint64_t MaskVal;
3318 ConstantSDNode *Mask = nullptr;
3319 if (C.Op0.getOpcode() == ISD::AND) {
3320 NewC.Op0 = C.Op0.getOperand(0);
3321 NewC.Op1 = C.Op0.getOperand(1);
3322 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3323 if (!Mask)
3324 return;
3325 MaskVal = Mask->getZExtValue();
3326 } else {
3327 // There is no instruction to compare with a 64-bit immediate
3328 // so use TMHH instead if possible. We need an unsigned ordered
3329 // comparison with an i64 immediate.
3330 if (NewC.Op0.getValueType() != MVT::i64 ||
3331 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3332 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3333 NewC.ICmpType == SystemZICMP::SignedOnly)
3334 return;
3335 // Convert LE and GT comparisons into LT and GE.
3336 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3337 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3338 if (CmpVal == uint64_t(-1))
3339 return;
3340 CmpVal += 1;
3341 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3342 }
3343 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3344 // be masked off without changing the result.
3345 MaskVal = -(CmpVal & -CmpVal);
3346 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3347 }
3348 if (!MaskVal)
3349 return;
3350
3351 // Check whether the combination of mask, comparison value and comparison
3352 // type are suitable.
3353 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3354 unsigned NewCCMask, ShiftVal;
3355 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3356 NewC.Op0.getOpcode() == ISD::SHL &&
3357 isSimpleShift(NewC.Op0, ShiftVal) &&
3358 (MaskVal >> ShiftVal != 0) &&
3359 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3360 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3361 MaskVal >> ShiftVal,
3362 CmpVal >> ShiftVal,
3363 SystemZICMP::Any))) {
3364 NewC.Op0 = NewC.Op0.getOperand(0);
3365 MaskVal >>= ShiftVal;
3366 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3367 NewC.Op0.getOpcode() == ISD::SRL &&
3368 isSimpleShift(NewC.Op0, ShiftVal) &&
3369 (MaskVal << ShiftVal != 0) &&
3370 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3371 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3372 MaskVal << ShiftVal,
3373 CmpVal << ShiftVal,
3375 NewC.Op0 = NewC.Op0.getOperand(0);
3376 MaskVal <<= ShiftVal;
3377 } else {
3378 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3379 NewC.ICmpType);
3380 if (!NewCCMask)
3381 return;
3382 }
3383
3384 // Go ahead and make the change.
3385 C.Opcode = SystemZISD::TM;
3386 C.Op0 = NewC.Op0;
3387 if (Mask && Mask->getZExtValue() == MaskVal)
3388 C.Op1 = SDValue(Mask, 0);
3389 else
3390 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3391 C.CCValid = SystemZ::CCMASK_TM;
3392 C.CCMask = NewCCMask;
3393}
3394
3395// Implement i128 comparison in vector registers.
3396static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3397 Comparison &C) {
3398 if (C.Opcode != SystemZISD::ICMP)
3399 return;
3400 if (C.Op0.getValueType() != MVT::i128)
3401 return;
3402
3403 // Recognize vector comparison reductions.
3404 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3405 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3406 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3407 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3408 bool CmpNull = isNullConstant(C.Op1);
3409 SDValue Src = peekThroughBitcasts(C.Op0);
3410 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3411 Src = Src.getOperand(0);
3412 CmpNull = !CmpNull;
3413 }
3414 unsigned Opcode = 0;
3415 if (Src.hasOneUse()) {
3416 switch (Src.getOpcode()) {
3417 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3418 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3419 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3420 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3421 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3422 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3423 default: break;
3424 }
3425 }
3426 if (Opcode) {
3427 C.Opcode = Opcode;
3428 C.Op0 = Src->getOperand(0);
3429 C.Op1 = Src->getOperand(1);
3430 C.CCValid = SystemZ::CCMASK_VCMP;
3432 if (!CmpEq)
3433 C.CCMask ^= C.CCValid;
3434 return;
3435 }
3436 }
3437
3438 // Everything below here is not useful if we have native i128 compares.
3439 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3440 return;
3441
3442 // (In-)Equality comparisons can be implemented via VCEQGS.
3443 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3444 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3445 C.Opcode = SystemZISD::VICMPES;
3446 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3447 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3448 C.CCValid = SystemZ::CCMASK_VCMP;
3449 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3450 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3451 else
3452 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3453 return;
3454 }
3455
3456 // Normalize other comparisons to GT.
3457 bool Swap = false, Invert = false;
3458 switch (C.CCMask) {
3459 case SystemZ::CCMASK_CMP_GT: break;
3460 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3461 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3462 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3463 default: llvm_unreachable("Invalid integer condition!");
3464 }
3465 if (Swap)
3466 std::swap(C.Op0, C.Op1);
3467
3468 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3469 C.Opcode = SystemZISD::UCMP128HI;
3470 else
3471 C.Opcode = SystemZISD::SCMP128HI;
3472 C.CCValid = SystemZ::CCMASK_ANY;
3473 C.CCMask = SystemZ::CCMASK_1;
3474
3475 if (Invert)
3476 C.CCMask ^= C.CCValid;
3477}
3478
3479// See whether the comparison argument contains a redundant AND
3480// and remove it if so. This sometimes happens due to the generic
3481// BRCOND expansion.
3483 Comparison &C) {
3484 if (C.Op0.getOpcode() != ISD::AND)
3485 return;
3486 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3487 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3488 return;
3489 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3490 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3491 return;
3492
3493 C.Op0 = C.Op0.getOperand(0);
3494}
3495
3496// Return a Comparison that tests the condition-code result of intrinsic
3497// node Call against constant integer CC using comparison code Cond.
3498// Opcode is the opcode of the SystemZISD operation for the intrinsic
3499// and CCValid is the set of possible condition-code results.
3500static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3501 SDValue Call, unsigned CCValid, uint64_t CC,
3503 Comparison C(Call, SDValue(), SDValue());
3504 C.Opcode = Opcode;
3505 C.CCValid = CCValid;
3506 if (Cond == ISD::SETEQ)
3507 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3508 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3509 else if (Cond == ISD::SETNE)
3510 // ...and the inverse of that.
3511 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3512 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3513 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3514 // always true for CC>3.
3515 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3516 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3517 // ...and the inverse of that.
3518 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3519 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3520 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3521 // always true for CC>3.
3522 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3523 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3524 // ...and the inverse of that.
3525 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3526 else
3527 llvm_unreachable("Unexpected integer comparison type");
3528 C.CCMask &= CCValid;
3529 return C;
3530}
3531
3532// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3533static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3534 ISD::CondCode Cond, const SDLoc &DL,
3535 SDValue Chain = SDValue(),
3536 bool IsSignaling = false) {
3537 if (CmpOp1.getOpcode() == ISD::Constant) {
3538 assert(!Chain);
3539 unsigned Opcode, CCValid;
3540 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3541 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3542 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3543 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3544 CmpOp1->getAsZExtVal(), Cond);
3545 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3546 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3547 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3548 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3549 CmpOp1->getAsZExtVal(), Cond);
3550 }
3551 Comparison C(CmpOp0, CmpOp1, Chain);
3552 C.CCMask = CCMaskForCondCode(Cond);
3553 if (C.Op0.getValueType().isFloatingPoint()) {
3554 C.CCValid = SystemZ::CCMASK_FCMP;
3555 if (!C.Chain)
3556 C.Opcode = SystemZISD::FCMP;
3557 else if (!IsSignaling)
3558 C.Opcode = SystemZISD::STRICT_FCMP;
3559 else
3560 C.Opcode = SystemZISD::STRICT_FCMPS;
3562 } else {
3563 assert(!C.Chain);
3564 C.CCValid = SystemZ::CCMASK_ICMP;
3565 C.Opcode = SystemZISD::ICMP;
3566 // Choose the type of comparison. Equality and inequality tests can
3567 // use either signed or unsigned comparisons. The choice also doesn't
3568 // matter if both sign bits are known to be clear. In those cases we
3569 // want to give the main isel code the freedom to choose whichever
3570 // form fits best.
3571 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3572 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3573 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3574 C.ICmpType = SystemZICMP::Any;
3575 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3576 C.ICmpType = SystemZICMP::UnsignedOnly;
3577 else
3578 C.ICmpType = SystemZICMP::SignedOnly;
3579 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3580 adjustForRedundantAnd(DAG, DL, C);
3581 adjustZeroCmp(DAG, DL, C);
3582 adjustSubwordCmp(DAG, DL, C);
3583 adjustForSubtraction(DAG, DL, C);
3585 adjustICmpTruncate(DAG, DL, C);
3586 }
3587
3588 if (shouldSwapCmpOperands(C)) {
3589 std::swap(C.Op0, C.Op1);
3590 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3591 }
3592
3594 adjustICmp128(DAG, DL, C);
3595 return C;
3596}
3597
3598// Emit the comparison instruction described by C.
3599static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3600 if (!C.Op1.getNode()) {
3601 SDNode *Node;
3602 switch (C.Op0.getOpcode()) {
3604 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3605 return SDValue(Node, 0);
3607 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3608 return SDValue(Node, Node->getNumValues() - 1);
3609 default:
3610 llvm_unreachable("Invalid comparison operands");
3611 }
3612 }
3613 if (C.Opcode == SystemZISD::ICMP)
3614 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3615 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3616 if (C.Opcode == SystemZISD::TM) {
3617 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3619 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3620 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3621 }
3622 if (C.Opcode == SystemZISD::VICMPES ||
3623 C.Opcode == SystemZISD::VICMPHS ||
3624 C.Opcode == SystemZISD::VICMPHLS ||
3625 C.Opcode == SystemZISD::VFCMPES ||
3626 C.Opcode == SystemZISD::VFCMPHS ||
3627 C.Opcode == SystemZISD::VFCMPHES) {
3628 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3629 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3630 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3631 return SDValue(Val.getNode(), 1);
3632 }
3633 if (C.Chain) {
3634 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3635 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3636 }
3637 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3638}
3639
3640// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3641// 64 bits. Extend is the extension type to use. Store the high part
3642// in Hi and the low part in Lo.
3643static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3644 SDValue Op0, SDValue Op1, SDValue &Hi,
3645 SDValue &Lo) {
3646 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3647 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3648 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3649 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3650 DAG.getConstant(32, DL, MVT::i64));
3651 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3652 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3653}
3654
3655// Lower a binary operation that produces two VT results, one in each
3656// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3657// and Opcode performs the GR128 operation. Store the even register result
3658// in Even and the odd register result in Odd.
3659static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3660 unsigned Opcode, SDValue Op0, SDValue Op1,
3661 SDValue &Even, SDValue &Odd) {
3662 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3663 bool Is32Bit = is32Bit(VT);
3664 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3665 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3666}
3667
3668// Return an i32 value that is 1 if the CC value produced by CCReg is
3669// in the mask CCMask and 0 otherwise. CC is known to have a value
3670// in CCValid, so other values can be ignored.
3671static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3672 unsigned CCValid, unsigned CCMask) {
3673 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3674 DAG.getConstant(0, DL, MVT::i32),
3675 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3676 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3677 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3678}
3679
3680// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3681// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3682// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3683// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3684// floating-point comparisons.
3687 switch (CC) {
3688 case ISD::SETOEQ:
3689 case ISD::SETEQ:
3690 switch (Mode) {
3691 case CmpMode::Int: return SystemZISD::VICMPE;
3692 case CmpMode::FP: return SystemZISD::VFCMPE;
3693 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3694 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3695 }
3696 llvm_unreachable("Bad mode");
3697
3698 case ISD::SETOGE:
3699 case ISD::SETGE:
3700 switch (Mode) {
3701 case CmpMode::Int: return 0;
3702 case CmpMode::FP: return SystemZISD::VFCMPHE;
3703 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3704 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3705 }
3706 llvm_unreachable("Bad mode");
3707
3708 case ISD::SETOGT:
3709 case ISD::SETGT:
3710 switch (Mode) {
3711 case CmpMode::Int: return SystemZISD::VICMPH;
3712 case CmpMode::FP: return SystemZISD::VFCMPH;
3713 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3714 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3715 }
3716 llvm_unreachable("Bad mode");
3717
3718 case ISD::SETUGT:
3719 switch (Mode) {
3720 case CmpMode::Int: return SystemZISD::VICMPHL;
3721 case CmpMode::FP: return 0;
3722 case CmpMode::StrictFP: return 0;
3723 case CmpMode::SignalingFP: return 0;
3724 }
3725 llvm_unreachable("Bad mode");
3726
3727 default:
3728 return 0;
3729 }
3730}
3731
3732// Return the SystemZISD vector comparison operation for CC or its inverse,
3733// or 0 if neither can be done directly. Indicate in Invert whether the
3734// result is for the inverse of CC. Mode is as above.
3736 bool &Invert) {
3737 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3738 Invert = false;
3739 return Opcode;
3740 }
3741
3742 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3743 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3744 Invert = true;
3745 return Opcode;
3746 }
3747
3748 return 0;
3749}
3750
3751// Return a v2f64 that contains the extended form of elements Start and Start+1
3752// of v4f32 value Op. If Chain is nonnull, return the strict form.
3753static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3754 SDValue Op, SDValue Chain) {
3755 int Mask[] = { Start, -1, Start + 1, -1 };
3756 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3757 if (Chain) {
3758 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3759 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3760 }
3761 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3762}
3763
3764// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3765// producing a result of type VT. If Chain is nonnull, return the strict form.
3766SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3767 const SDLoc &DL, EVT VT,
3768 SDValue CmpOp0,
3769 SDValue CmpOp1,
3770 SDValue Chain) const {
3771 // There is no hardware support for v4f32 (unless we have the vector
3772 // enhancements facility 1), so extend the vector into two v2f64s
3773 // and compare those.
3774 if (CmpOp0.getValueType() == MVT::v4f32 &&
3775 !Subtarget.hasVectorEnhancements1()) {
3776 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3777 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3778 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3779 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3780 if (Chain) {
3781 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3782 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3783 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3784 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3785 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3786 H1.getValue(1), L1.getValue(1),
3787 HRes.getValue(1), LRes.getValue(1) };
3788 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3789 SDValue Ops[2] = { Res, NewChain };
3790 return DAG.getMergeValues(Ops, DL);
3791 }
3792 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3793 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3794 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3795 }
3796 if (Chain) {
3797 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3798 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3799 }
3800 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3801}
3802
3803// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3804// an integer mask of type VT. If Chain is nonnull, we have a strict
3805// floating-point comparison. If in addition IsSignaling is true, we have
3806// a strict signaling floating-point comparison.
3807SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3808 const SDLoc &DL, EVT VT,
3809 ISD::CondCode CC,
3810 SDValue CmpOp0,
3811 SDValue CmpOp1,
3812 SDValue Chain,
3813 bool IsSignaling) const {
3814 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3815 assert (!Chain || IsFP);
3816 assert (!IsSignaling || Chain);
3817 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3818 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3819 bool Invert = false;
3820 SDValue Cmp;
3821 switch (CC) {
3822 // Handle tests for order using (or (ogt y x) (oge x y)).
3823 case ISD::SETUO:
3824 Invert = true;
3825 [[fallthrough]];
3826 case ISD::SETO: {
3827 assert(IsFP && "Unexpected integer comparison");
3828 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3829 DL, VT, CmpOp1, CmpOp0, Chain);
3830 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3831 DL, VT, CmpOp0, CmpOp1, Chain);
3832 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3833 if (Chain)
3834 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3835 LT.getValue(1), GE.getValue(1));
3836 break;
3837 }
3838
3839 // Handle <> tests using (or (ogt y x) (ogt x y)).
3840 case ISD::SETUEQ:
3841 Invert = true;
3842 [[fallthrough]];
3843 case ISD::SETONE: {
3844 assert(IsFP && "Unexpected integer comparison");
3845 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3846 DL, VT, CmpOp1, CmpOp0, Chain);
3847 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3848 DL, VT, CmpOp0, CmpOp1, Chain);
3849 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3850 if (Chain)
3851 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3852 LT.getValue(1), GT.getValue(1));
3853 break;
3854 }
3855
3856 // Otherwise a single comparison is enough. It doesn't really
3857 // matter whether we try the inversion or the swap first, since
3858 // there are no cases where both work.
3859 default:
3860 // Optimize sign-bit comparisons to signed compares.
3861 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3863 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3864 APInt Mask;
3865 if (CmpOp0.getOpcode() == ISD::AND
3866 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3867 && Mask == APInt::getSignMask(EltSize)) {
3868 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3869 CmpOp0 = CmpOp0.getOperand(0);
3870 }
3871 }
3872 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3873 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3874 else {
3876 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3877 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3878 else
3879 llvm_unreachable("Unhandled comparison");
3880 }
3881 if (Chain)
3882 Chain = Cmp.getValue(1);
3883 break;
3884 }
3885 if (Invert) {
3886 SDValue Mask =
3887 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3888 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3889 }
3890 if (Chain && Chain.getNode() != Cmp.getNode()) {
3891 SDValue Ops[2] = { Cmp, Chain };
3892 Cmp = DAG.getMergeValues(Ops, DL);
3893 }
3894 return Cmp;
3895}
3896
3897SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3898 SelectionDAG &DAG) const {
3899 SDValue CmpOp0 = Op.getOperand(0);
3900 SDValue CmpOp1 = Op.getOperand(1);
3901 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3902 SDLoc DL(Op);
3903 EVT VT = Op.getValueType();
3904 if (VT.isVector())
3905 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3906
3907 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3908 SDValue CCReg = emitCmp(DAG, DL, C);
3909 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3910}
3911
3912SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3913 SelectionDAG &DAG,
3914 bool IsSignaling) const {
3915 SDValue Chain = Op.getOperand(0);
3916 SDValue CmpOp0 = Op.getOperand(1);
3917 SDValue CmpOp1 = Op.getOperand(2);
3918 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3919 SDLoc DL(Op);
3920 EVT VT = Op.getNode()->getValueType(0);
3921 if (VT.isVector()) {
3922 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3923 Chain, IsSignaling);
3924 return Res.getValue(Op.getResNo());
3925 }
3926
3927 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3928 SDValue CCReg = emitCmp(DAG, DL, C);
3929 CCReg->setFlags(Op->getFlags());
3930 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3931 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3932 return DAG.getMergeValues(Ops, DL);
3933}
3934
3935SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3936 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3937 SDValue CmpOp0 = Op.getOperand(2);
3938 SDValue CmpOp1 = Op.getOperand(3);
3939 SDValue Dest = Op.getOperand(4);
3940 SDLoc DL(Op);
3941
3942 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3943 SDValue CCReg = emitCmp(DAG, DL, C);
3944 return DAG.getNode(
3945 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3946 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3947 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3948}
3949
3950// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3951// allowing Pos and Neg to be wider than CmpOp.
3952static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3953 return (Neg.getOpcode() == ISD::SUB &&
3954 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3955 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3956 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3957 Pos.getOperand(0) == CmpOp)));
3958}
3959
3960// Return the absolute or negative absolute of Op; IsNegative decides which.
3962 bool IsNegative) {
3963 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3964 if (IsNegative)
3965 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3966 DAG.getConstant(0, DL, Op.getValueType()), Op);
3967 return Op;
3968}
3969
3971 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3972 EVT VT = MVT::i128;
3973 unsigned Op;
3974
3975 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3976 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3977 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3978 std::swap(TrueOp, FalseOp);
3979 C.CCMask ^= C.CCValid;
3980 }
3981 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3982 std::swap(C.Op0, C.Op1);
3983 C.CCMask = SystemZ::CCMASK_CMP_GT;
3984 }
3985 switch (C.CCMask) {
3987 Op = SystemZISD::VICMPE;
3988 break;
3990 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3991 Op = SystemZISD::VICMPHL;
3992 else
3993 Op = SystemZISD::VICMPH;
3994 break;
3995 default:
3996 llvm_unreachable("Unhandled comparison");
3997 break;
3998 }
3999
4000 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
4001 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
4002 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
4003 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
4004}
4005
4006SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
4007 SelectionDAG &DAG) const {
4008 SDValue CmpOp0 = Op.getOperand(0);
4009 SDValue CmpOp1 = Op.getOperand(1);
4010 SDValue TrueOp = Op.getOperand(2);
4011 SDValue FalseOp = Op.getOperand(3);
4012 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4013 SDLoc DL(Op);
4014
4015 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
4016 // legalizer, as it will be handled according to the type of the resulting
4017 // value. Extend them here if needed.
4018 if (CmpOp0.getSimpleValueType() == MVT::f16) {
4019 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
4020 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
4021 }
4022
4023 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
4024
4025 // Check for absolute and negative-absolute selections, including those
4026 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4027 // This check supplements the one in DAGCombiner.
4028 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4029 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4030 C.Op1.getOpcode() == ISD::Constant &&
4031 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
4032 C.Op1->getAsZExtVal() == 0) {
4033 if (isAbsolute(C.Op0, TrueOp, FalseOp))
4034 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
4035 if (isAbsolute(C.Op0, FalseOp, TrueOp))
4036 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
4037 }
4038
4039 if (Subtarget.hasVectorEnhancements3() &&
4040 C.Opcode == SystemZISD::ICMP &&
4041 C.Op0.getValueType() == MVT::i128 &&
4042 TrueOp.getValueType() == MVT::i128) {
4043 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4044 }
4045
4046 SDValue CCReg = emitCmp(DAG, DL, C);
4047 SDValue Ops[] = {TrueOp, FalseOp,
4048 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4049 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4050
4051 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4052}
4053
4054SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4055 SelectionDAG &DAG) const {
4056 SDLoc DL(Node);
4057 const GlobalValue *GV = Node->getGlobal();
4058 int64_t Offset = Node->getOffset();
4059 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4061
4063 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4064 if (isInt<32>(Offset)) {
4065 // Assign anchors at 1<<12 byte boundaries.
4066 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4067 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4068 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4069
4070 // The offset can be folded into the address if it is aligned to a
4071 // halfword.
4072 Offset -= Anchor;
4073 if (Offset != 0 && (Offset & 1) == 0) {
4074 SDValue Full =
4075 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4076 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4077 Offset = 0;
4078 }
4079 } else {
4080 // Conservatively load a constant offset greater than 32 bits into a
4081 // register below.
4082 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4083 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4084 }
4085 } else if (Subtarget.isTargetELF()) {
4086 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4087 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4088 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4090 } else if (Subtarget.isTargetzOS()) {
4091 Result = getADAEntry(DAG, GV, DL, PtrVT);
4092 } else
4093 llvm_unreachable("Unexpected Subtarget");
4094
4095 // If there was a non-zero offset that we didn't fold, create an explicit
4096 // addition for it.
4097 if (Offset != 0)
4098 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4099 DAG.getSignedConstant(Offset, DL, PtrVT));
4100
4101 return Result;
4102}
4103
4104SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4105 SelectionDAG &DAG,
4106 unsigned Opcode,
4107 SDValue GOTOffset) const {
4108 SDLoc DL(Node);
4109 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4110 SDValue Chain = DAG.getEntryNode();
4111 SDValue Glue;
4112
4115 report_fatal_error("In GHC calling convention TLS is not supported");
4116
4117 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4118 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4119 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4120 Glue = Chain.getValue(1);
4121 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4122 Glue = Chain.getValue(1);
4123
4124 // The first call operand is the chain and the second is the TLS symbol.
4126 Ops.push_back(Chain);
4127 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4128 Node->getValueType(0),
4129 0, 0));
4130
4131 // Add argument registers to the end of the list so that they are
4132 // known live into the call.
4133 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4134 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4135
4136 // Add a register mask operand representing the call-preserved registers.
4137 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4138 const uint32_t *Mask =
4139 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4140 assert(Mask && "Missing call preserved mask for calling convention");
4141 Ops.push_back(DAG.getRegisterMask(Mask));
4142
4143 // Glue the call to the argument copies.
4144 Ops.push_back(Glue);
4145
4146 // Emit the call.
4147 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4148 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4149 Glue = Chain.getValue(1);
4150
4151 // Copy the return value from %r2.
4152 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4153}
4154
4155SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4156 SelectionDAG &DAG) const {
4157 SDValue Chain = DAG.getEntryNode();
4158 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4159
4160 // The high part of the thread pointer is in access register 0.
4161 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4162 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4163
4164 // The low part of the thread pointer is in access register 1.
4165 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4166 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4167
4168 // Merge them into a single 64-bit address.
4169 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4170 DAG.getConstant(32, DL, PtrVT));
4171 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4172}
4173
4174SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4175 SelectionDAG &DAG) const {
4176 if (DAG.getTarget().useEmulatedTLS())
4177 return LowerToTLSEmulatedModel(Node, DAG);
4178 SDLoc DL(Node);
4179 const GlobalValue *GV = Node->getGlobal();
4180 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4181 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4182
4185 report_fatal_error("In GHC calling convention TLS is not supported");
4186
4187 SDValue TP = lowerThreadPointer(DL, DAG);
4188
4189 // Get the offset of GA from the thread pointer, based on the TLS model.
4191 switch (model) {
4193 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4194 SystemZConstantPoolValue *CPV =
4196
4197 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4198 Offset = DAG.getLoad(
4199 PtrVT, DL, DAG.getEntryNode(), Offset,
4201
4202 // Call __tls_get_offset to retrieve the offset.
4203 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4204 break;
4205 }
4206
4208 // Load the GOT offset of the module ID.
4209 SystemZConstantPoolValue *CPV =
4211
4212 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4213 Offset = DAG.getLoad(
4214 PtrVT, DL, DAG.getEntryNode(), Offset,
4216
4217 // Call __tls_get_offset to retrieve the module base offset.
4218 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4219
4220 // Note: The SystemZLDCleanupPass will remove redundant computations
4221 // of the module base offset. Count total number of local-dynamic
4222 // accesses to trigger execution of that pass.
4223 SystemZMachineFunctionInfo* MFI =
4224 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4226
4227 // Add the per-symbol offset.
4229
4230 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4231 DTPOffset = DAG.getLoad(
4232 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4234
4235 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4236 break;
4237 }
4238
4239 case TLSModel::InitialExec: {
4240 // Load the offset from the GOT.
4241 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4243 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4244 Offset =
4245 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4247 break;
4248 }
4249
4250 case TLSModel::LocalExec: {
4251 // Force the offset into the constant pool and load it from there.
4252 SystemZConstantPoolValue *CPV =
4254
4255 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4256 Offset = DAG.getLoad(
4257 PtrVT, DL, DAG.getEntryNode(), Offset,
4259 break;
4260 }
4261 }
4262
4263 // Add the base and offset together.
4264 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4265}
4266
4267SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4268 SelectionDAG &DAG) const {
4269 SDLoc DL(Node);
4270 const BlockAddress *BA = Node->getBlockAddress();
4271 int64_t Offset = Node->getOffset();
4272 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4273
4274 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4275 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4276 return Result;
4277}
4278
4279SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4280 SelectionDAG &DAG) const {
4281 SDLoc DL(JT);
4282 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4283 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4284
4285 // Use LARL to load the address of the table.
4286 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4287}
4288
4289SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4290 SelectionDAG &DAG) const {
4291 SDLoc DL(CP);
4292 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4293
4296 Result =
4297 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4298 else
4299 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4300 CP->getOffset());
4301
4302 // Use LARL to load the address of the constant pool entry.
4303 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4304}
4305
4306SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4307 SelectionDAG &DAG) const {
4308 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4309 MachineFunction &MF = DAG.getMachineFunction();
4310 MachineFrameInfo &MFI = MF.getFrameInfo();
4311 MFI.setFrameAddressIsTaken(true);
4312
4313 SDLoc DL(Op);
4314 unsigned Depth = Op.getConstantOperandVal(0);
4315 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4316
4317 // By definition, the frame address is the address of the back chain. (In
4318 // the case of packed stack without backchain, return the address where the
4319 // backchain would have been stored. This will either be an unused space or
4320 // contain a saved register).
4321 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4322 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4323
4324 if (Depth > 0) {
4325 // FIXME The frontend should detect this case.
4326 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4327 report_fatal_error("Unsupported stack frame traversal count");
4328
4329 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4330 while (Depth--) {
4331 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4332 MachinePointerInfo());
4333 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4334 }
4335 }
4336
4337 return BackChain;
4338}
4339
4340SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4341 SelectionDAG &DAG) const {
4342 MachineFunction &MF = DAG.getMachineFunction();
4343 MachineFrameInfo &MFI = MF.getFrameInfo();
4344 MFI.setReturnAddressIsTaken(true);
4345
4346 SDLoc DL(Op);
4347 unsigned Depth = Op.getConstantOperandVal(0);
4348 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4349
4350 if (Depth > 0) {
4351 // FIXME The frontend should detect this case.
4352 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4353 report_fatal_error("Unsupported stack frame traversal count");
4354
4355 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4356 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4357 int Offset = TFL->getReturnAddressOffset(MF);
4358 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4359 DAG.getSignedConstant(Offset, DL, PtrVT));
4360 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4361 MachinePointerInfo());
4362 }
4363
4364 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4365 // implicit live-in.
4366 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4368 &SystemZ::GR64BitRegClass);
4369 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4370}
4371
4372SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4373 SelectionDAG &DAG) const {
4374 SDLoc DL(Op);
4375 SDValue In = Op.getOperand(0);
4376 EVT InVT = In.getValueType();
4377 EVT ResVT = Op.getValueType();
4378
4379 // Convert loads directly. This is normally done by DAGCombiner,
4380 // but we need this case for bitcasts that are created during lowering
4381 // and which are then lowered themselves.
4382 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4383 if (ISD::isNormalLoad(LoadN)) {
4384 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4385 LoadN->getBasePtr(), LoadN->getMemOperand());
4386 // Update the chain uses.
4387 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4388 return NewLoad;
4389 }
4390
4391 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4392 SDValue In64;
4393 if (Subtarget.hasHighWord()) {
4394 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4395 MVT::i64);
4396 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4397 MVT::i64, SDValue(U64, 0), In);
4398 } else {
4399 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4400 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4401 DAG.getConstant(32, DL, MVT::i64));
4402 }
4403 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4404 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4405 DL, MVT::f32, Out64);
4406 }
4407 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4408 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4409 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4410 MVT::f64, SDValue(U64, 0), In);
4411 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4412 if (Subtarget.hasHighWord())
4413 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4414 MVT::i32, Out64);
4415 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4416 DAG.getConstant(32, DL, MVT::i64));
4417 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4418 }
4419 llvm_unreachable("Unexpected bitcast combination");
4420}
4421
4422SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4423 SelectionDAG &DAG) const {
4424
4425 if (Subtarget.isTargetXPLINK64())
4426 return lowerVASTART_XPLINK(Op, DAG);
4427 else
4428 return lowerVASTART_ELF(Op, DAG);
4429}
4430
4431SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4432 SelectionDAG &DAG) const {
4433 MachineFunction &MF = DAG.getMachineFunction();
4434 SystemZMachineFunctionInfo *FuncInfo =
4435 MF.getInfo<SystemZMachineFunctionInfo>();
4436
4437 SDLoc DL(Op);
4438
4439 // vastart just stores the address of the VarArgsFrameIndex slot into the
4440 // memory location argument.
4441 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4442 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4443 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4444 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4445 MachinePointerInfo(SV));
4446}
4447
4448SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4449 SelectionDAG &DAG) const {
4450 MachineFunction &MF = DAG.getMachineFunction();
4451 SystemZMachineFunctionInfo *FuncInfo =
4452 MF.getInfo<SystemZMachineFunctionInfo>();
4453 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4454
4455 SDValue Chain = Op.getOperand(0);
4456 SDValue Addr = Op.getOperand(1);
4457 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4458 SDLoc DL(Op);
4459
4460 // The initial values of each field.
4461 const unsigned NumFields = 4;
4462 SDValue Fields[NumFields] = {
4463 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4464 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4465 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4466 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4467 };
4468
4469 // Store each field into its respective slot.
4470 SDValue MemOps[NumFields];
4471 unsigned Offset = 0;
4472 for (unsigned I = 0; I < NumFields; ++I) {
4473 SDValue FieldAddr = Addr;
4474 if (Offset != 0)
4475 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4477 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4478 MachinePointerInfo(SV, Offset));
4479 Offset += 8;
4480 }
4481 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4482}
4483
4484SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4485 SelectionDAG &DAG) const {
4486 SDValue Chain = Op.getOperand(0);
4487 SDValue DstPtr = Op.getOperand(1);
4488 SDValue SrcPtr = Op.getOperand(2);
4489 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4490 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4491 SDLoc DL(Op);
4492
4493 uint32_t Sz =
4494 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4495 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4496 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4497 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4498 MachinePointerInfo(SrcSV));
4499}
4500
4501SDValue
4502SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4503 SelectionDAG &DAG) const {
4504 if (Subtarget.isTargetXPLINK64())
4505 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4506 else
4507 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4508}
4509
4510SDValue
4511SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4512 SelectionDAG &DAG) const {
4513 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4514 MachineFunction &MF = DAG.getMachineFunction();
4515 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4516 SDValue Chain = Op.getOperand(0);
4517 SDValue Size = Op.getOperand(1);
4518 SDValue Align = Op.getOperand(2);
4519 SDLoc DL(Op);
4520
4521 // If user has set the no alignment function attribute, ignore
4522 // alloca alignments.
4523 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4524
4525 uint64_t StackAlign = TFI->getStackAlignment();
4526 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4527 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4528
4529 SDValue NeededSpace = Size;
4530
4531 // Add extra space for alignment if needed.
4532 EVT PtrVT = getPointerTy(MF.getDataLayout());
4533 if (ExtraAlignSpace)
4534 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4535 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4536
4537 bool IsSigned = false;
4538 bool DoesNotReturn = false;
4539 bool IsReturnValueUsed = false;
4540 EVT VT = Op.getValueType();
4541 SDValue AllocaCall =
4542 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4543 CallingConv::C, IsSigned, DL, DoesNotReturn,
4544 IsReturnValueUsed)
4545 .first;
4546
4547 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4548 // to end of call in order to ensure it isn't broken up from the call
4549 // sequence.
4550 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4551 Register SPReg = Regs.getStackPointerRegister();
4552 Chain = AllocaCall.getValue(1);
4553 SDValue Glue = AllocaCall.getValue(2);
4554 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4555 Chain = NewSPRegNode.getValue(1);
4556
4557 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4558 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4559 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4560
4561 // Dynamically realign if needed.
4562 if (ExtraAlignSpace) {
4563 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4564 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4565 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4566 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4567 }
4568
4569 SDValue Ops[2] = {Result, Chain};
4570 return DAG.getMergeValues(Ops, DL);
4571}
4572
4573SDValue
4574SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4575 SelectionDAG &DAG) const {
4576 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4577 MachineFunction &MF = DAG.getMachineFunction();
4578 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4579 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4580
4581 SDValue Chain = Op.getOperand(0);
4582 SDValue Size = Op.getOperand(1);
4583 SDValue Align = Op.getOperand(2);
4584 SDLoc DL(Op);
4585
4586 // If user has set the no alignment function attribute, ignore
4587 // alloca alignments.
4588 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4589
4590 uint64_t StackAlign = TFI->getStackAlignment();
4591 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4592 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4593
4595 SDValue NeededSpace = Size;
4596
4597 // Get a reference to the stack pointer.
4598 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4599
4600 // If we need a backchain, save it now.
4601 SDValue Backchain;
4602 if (StoreBackchain)
4603 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4604 MachinePointerInfo());
4605
4606 // Add extra space for alignment if needed.
4607 if (ExtraAlignSpace)
4608 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4609 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4610
4611 // Get the new stack pointer value.
4612 SDValue NewSP;
4613 if (hasInlineStackProbe(MF)) {
4614 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4615 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4616 Chain = NewSP.getValue(1);
4617 }
4618 else {
4619 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4620 // Copy the new stack pointer back.
4621 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4622 }
4623
4624 // The allocated data lives above the 160 bytes allocated for the standard
4625 // frame, plus any outgoing stack arguments. We don't know how much that
4626 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4627 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4628 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4629
4630 // Dynamically realign if needed.
4631 if (RequiredAlign > StackAlign) {
4632 Result =
4633 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4634 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4635 Result =
4636 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4637 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4638 }
4639
4640 if (StoreBackchain)
4641 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4642 MachinePointerInfo());
4643
4644 SDValue Ops[2] = { Result, Chain };
4645 return DAG.getMergeValues(Ops, DL);
4646}
4647
4648SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4649 SDValue Op, SelectionDAG &DAG) const {
4650 SDLoc DL(Op);
4651
4652 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4653}
4654
4655SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4656 SelectionDAG &DAG,
4657 unsigned Opcode) const {
4658 EVT VT = Op.getValueType();
4659 SDLoc DL(Op);
4660 SDValue Even, Odd;
4661
4662 // This custom expander is only used on z17 and later for 64-bit types.
4663 assert(!is32Bit(VT));
4664 assert(Subtarget.hasMiscellaneousExtensions2());
4665
4666 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4667 // the high result in the even register. Return the latter.
4668 lowerGR128Binary(DAG, DL, VT, Opcode,
4669 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4670 return Even;
4671}
4672
4673SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4674 SelectionDAG &DAG) const {
4675 EVT VT = Op.getValueType();
4676 SDLoc DL(Op);
4677 SDValue Ops[2];
4678 if (is32Bit(VT))
4679 // Just do a normal 64-bit multiplication and extract the results.
4680 // We define this so that it can be used for constant division.
4681 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4682 Op.getOperand(1), Ops[1], Ops[0]);
4683 else if (Subtarget.hasMiscellaneousExtensions2())
4684 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4685 // the high result in the even register. ISD::SMUL_LOHI is defined to
4686 // return the low half first, so the results are in reverse order.
4687 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4688 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4689 else {
4690 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4691 //
4692 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4693 //
4694 // but using the fact that the upper halves are either all zeros
4695 // or all ones:
4696 //
4697 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4698 //
4699 // and grouping the right terms together since they are quicker than the
4700 // multiplication:
4701 //
4702 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4703 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4704 SDValue LL = Op.getOperand(0);
4705 SDValue RL = Op.getOperand(1);
4706 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4707 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4708 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4709 // the high result in the even register. ISD::SMUL_LOHI is defined to
4710 // return the low half first, so the results are in reverse order.
4711 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4712 LL, RL, Ops[1], Ops[0]);
4713 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4714 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4715 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4716 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4717 }
4718 return DAG.getMergeValues(Ops, DL);
4719}
4720
4721SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4722 SelectionDAG &DAG) const {
4723 EVT VT = Op.getValueType();
4724 SDLoc DL(Op);
4725 SDValue Ops[2];
4726 if (is32Bit(VT))
4727 // Just do a normal 64-bit multiplication and extract the results.
4728 // We define this so that it can be used for constant division.
4729 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4730 Op.getOperand(1), Ops[1], Ops[0]);
4731 else
4732 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4733 // the high result in the even register. ISD::UMUL_LOHI is defined to
4734 // return the low half first, so the results are in reverse order.
4735 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4736 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4737 return DAG.getMergeValues(Ops, DL);
4738}
4739
4740SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4741 SelectionDAG &DAG) const {
4742 SDValue Op0 = Op.getOperand(0);
4743 SDValue Op1 = Op.getOperand(1);
4744 EVT VT = Op.getValueType();
4745 SDLoc DL(Op);
4746
4747 // We use DSGF for 32-bit division. This means the first operand must
4748 // always be 64-bit, and the second operand should be 32-bit whenever
4749 // that is possible, to improve performance.
4750 if (is32Bit(VT))
4751 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4752 else if (DAG.ComputeNumSignBits(Op1) > 32)
4753 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4754
4755 // DSG(F) returns the remainder in the even register and the
4756 // quotient in the odd register.
4757 SDValue Ops[2];
4758 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4759 return DAG.getMergeValues(Ops, DL);
4760}
4761
4762SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4763 SelectionDAG &DAG) const {
4764 EVT VT = Op.getValueType();
4765 SDLoc DL(Op);
4766
4767 // DL(G) returns the remainder in the even register and the
4768 // quotient in the odd register.
4769 SDValue Ops[2];
4770 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4771 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4772 return DAG.getMergeValues(Ops, DL);
4773}
4774
4775SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4776 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4777
4778 // Get the known-zero masks for each operand.
4779 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4780 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4781 DAG.computeKnownBits(Ops[1])};
4782
4783 // See if the upper 32 bits of one operand and the lower 32 bits of the
4784 // other are known zero. They are the low and high operands respectively.
4785 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4786 Known[1].Zero.getZExtValue() };
4787 unsigned High, Low;
4788 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4789 High = 1, Low = 0;
4790 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4791 High = 0, Low = 1;
4792 else
4793 return Op;
4794
4795 SDValue LowOp = Ops[Low];
4796 SDValue HighOp = Ops[High];
4797
4798 // If the high part is a constant, we're better off using IILH.
4799 if (HighOp.getOpcode() == ISD::Constant)
4800 return Op;
4801
4802 // If the low part is a constant that is outside the range of LHI,
4803 // then we're better off using IILF.
4804 if (LowOp.getOpcode() == ISD::Constant) {
4805 int64_t Value = int32_t(LowOp->getAsZExtVal());
4806 if (!isInt<16>(Value))
4807 return Op;
4808 }
4809
4810 // Check whether the high part is an AND that doesn't change the
4811 // high 32 bits and just masks out low bits. We can skip it if so.
4812 if (HighOp.getOpcode() == ISD::AND &&
4813 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4814 SDValue HighOp0 = HighOp.getOperand(0);
4815 uint64_t Mask = HighOp.getConstantOperandVal(1);
4816 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4817 HighOp = HighOp0;
4818 }
4819
4820 // Take advantage of the fact that all GR32 operations only change the
4821 // low 32 bits by truncating Low to an i32 and inserting it directly
4822 // using a subreg. The interesting cases are those where the truncation
4823 // can be folded.
4824 SDLoc DL(Op);
4825 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4826 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4827 MVT::i64, HighOp, Low32);
4828}
4829
4830// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4831SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4832 SelectionDAG &DAG) const {
4833 SDNode *N = Op.getNode();
4834 SDValue LHS = N->getOperand(0);
4835 SDValue RHS = N->getOperand(1);
4836 SDLoc DL(N);
4837
4838 if (N->getValueType(0) == MVT::i128) {
4839 unsigned BaseOp = 0;
4840 unsigned FlagOp = 0;
4841 bool IsBorrow = false;
4842 switch (Op.getOpcode()) {
4843 default: llvm_unreachable("Unknown instruction!");
4844 case ISD::UADDO:
4845 BaseOp = ISD::ADD;
4846 FlagOp = SystemZISD::VACC;
4847 break;
4848 case ISD::USUBO:
4849 BaseOp = ISD::SUB;
4850 FlagOp = SystemZISD::VSCBI;
4851 IsBorrow = true;
4852 break;
4853 }
4854 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4855 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4856 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4857 DAG.getValueType(MVT::i1));
4858 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4859 if (IsBorrow)
4860 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4861 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4862 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4863 }
4864
4865 unsigned BaseOp = 0;
4866 unsigned CCValid = 0;
4867 unsigned CCMask = 0;
4868
4869 switch (Op.getOpcode()) {
4870 default: llvm_unreachable("Unknown instruction!");
4871 case ISD::SADDO:
4872 BaseOp = SystemZISD::SADDO;
4873 CCValid = SystemZ::CCMASK_ARITH;
4875 break;
4876 case ISD::SSUBO:
4877 BaseOp = SystemZISD::SSUBO;
4878 CCValid = SystemZ::CCMASK_ARITH;
4880 break;
4881 case ISD::UADDO:
4882 BaseOp = SystemZISD::UADDO;
4883 CCValid = SystemZ::CCMASK_LOGICAL;
4885 break;
4886 case ISD::USUBO:
4887 BaseOp = SystemZISD::USUBO;
4888 CCValid = SystemZ::CCMASK_LOGICAL;
4890 break;
4891 }
4892
4893 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4894 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4895
4896 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4897 if (N->getValueType(1) == MVT::i1)
4898 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4899
4900 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4901}
4902
4903static bool isAddCarryChain(SDValue Carry) {
4904 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4905 Carry->getValueType(0) != MVT::i128)
4906 Carry = Carry.getOperand(2);
4907 return Carry.getOpcode() == ISD::UADDO &&
4908 Carry->getValueType(0) != MVT::i128;
4909}
4910
4911static bool isSubBorrowChain(SDValue Carry) {
4912 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4913 Carry->getValueType(0) != MVT::i128)
4914 Carry = Carry.getOperand(2);
4915 return Carry.getOpcode() == ISD::USUBO &&
4916 Carry->getValueType(0) != MVT::i128;
4917}
4918
4919// Lower UADDO_CARRY/USUBO_CARRY nodes.
4920SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4921 SelectionDAG &DAG) const {
4922
4923 SDNode *N = Op.getNode();
4924 MVT VT = N->getSimpleValueType(0);
4925
4926 // Let legalize expand this if it isn't a legal type yet.
4927 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4928 return SDValue();
4929
4930 SDValue LHS = N->getOperand(0);
4931 SDValue RHS = N->getOperand(1);
4932 SDValue Carry = Op.getOperand(2);
4933 SDLoc DL(N);
4934
4935 if (VT == MVT::i128) {
4936 unsigned BaseOp = 0;
4937 unsigned FlagOp = 0;
4938 bool IsBorrow = false;
4939 switch (Op.getOpcode()) {
4940 default: llvm_unreachable("Unknown instruction!");
4941 case ISD::UADDO_CARRY:
4942 BaseOp = SystemZISD::VAC;
4943 FlagOp = SystemZISD::VACCC;
4944 break;
4945 case ISD::USUBO_CARRY:
4946 BaseOp = SystemZISD::VSBI;
4947 FlagOp = SystemZISD::VSBCBI;
4948 IsBorrow = true;
4949 break;
4950 }
4951 if (IsBorrow)
4952 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4953 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4954 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4955 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4956 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4957 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4958 DAG.getValueType(MVT::i1));
4959 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4960 if (IsBorrow)
4961 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4962 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4963 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4964 }
4965
4966 unsigned BaseOp = 0;
4967 unsigned CCValid = 0;
4968 unsigned CCMask = 0;
4969
4970 switch (Op.getOpcode()) {
4971 default: llvm_unreachable("Unknown instruction!");
4972 case ISD::UADDO_CARRY:
4973 if (!isAddCarryChain(Carry))
4974 return SDValue();
4975
4976 BaseOp = SystemZISD::ADDCARRY;
4977 CCValid = SystemZ::CCMASK_LOGICAL;
4979 break;
4980 case ISD::USUBO_CARRY:
4981 if (!isSubBorrowChain(Carry))
4982 return SDValue();
4983
4984 BaseOp = SystemZISD::SUBCARRY;
4985 CCValid = SystemZ::CCMASK_LOGICAL;
4987 break;
4988 }
4989
4990 // Set the condition code from the carry flag.
4991 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4992 DAG.getConstant(CCValid, DL, MVT::i32),
4993 DAG.getConstant(CCMask, DL, MVT::i32));
4994
4995 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4996 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4997
4998 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4999 if (N->getValueType(1) == MVT::i1)
5000 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
5001
5002 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
5003}
5004
5005SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
5006 SelectionDAG &DAG) const {
5007 EVT VT = Op.getValueType();
5008 SDLoc DL(Op);
5009 Op = Op.getOperand(0);
5010
5011 if (VT.getScalarSizeInBits() == 128) {
5012 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
5013 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
5014 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
5015 DAG.getConstant(0, DL, MVT::i64));
5016 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5017 return Op;
5018 }
5019
5020 // Handle vector types via VPOPCT.
5021 if (VT.isVector()) {
5022 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
5023 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
5024 switch (VT.getScalarSizeInBits()) {
5025 case 8:
5026 break;
5027 case 16: {
5028 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5029 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
5030 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
5031 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5032 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
5033 break;
5034 }
5035 case 32: {
5036 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5037 DAG.getConstant(0, DL, MVT::i32));
5038 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5039 break;
5040 }
5041 case 64: {
5042 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5043 DAG.getConstant(0, DL, MVT::i32));
5044 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5045 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5046 break;
5047 }
5048 default:
5049 llvm_unreachable("Unexpected type");
5050 }
5051 return Op;
5052 }
5053
5054 // Get the known-zero mask for the operand.
5055 KnownBits Known = DAG.computeKnownBits(Op);
5056 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5057 if (NumSignificantBits == 0)
5058 return DAG.getConstant(0, DL, VT);
5059
5060 // Skip known-zero high parts of the operand.
5061 int64_t OrigBitSize = VT.getSizeInBits();
5062 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5063 BitSize = std::min(BitSize, OrigBitSize);
5064
5065 // The POPCNT instruction counts the number of bits in each byte.
5066 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5067 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5068 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5069
5070 // Add up per-byte counts in a binary tree. All bits of Op at
5071 // position larger than BitSize remain zero throughout.
5072 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5073 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5074 if (BitSize != OrigBitSize)
5075 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5076 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5077 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5078 }
5079
5080 // Extract overall result from high byte.
5081 if (BitSize > 8)
5082 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5083 DAG.getConstant(BitSize - 8, DL, VT));
5084
5085 return Op;
5086}
5087
5088SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5089 SelectionDAG &DAG) const {
5090 SDLoc DL(Op);
5091 AtomicOrdering FenceOrdering =
5092 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5093 SyncScope::ID FenceSSID =
5094 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5095
5096 // The only fence that needs an instruction is a sequentially-consistent
5097 // cross-thread fence.
5098 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5099 FenceSSID == SyncScope::System) {
5100 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5101 Op.getOperand(0)),
5102 0);
5103 }
5104
5105 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5106 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5107}
5108
5109SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5110 SelectionDAG &DAG) const {
5111 EVT RegVT = Op.getValueType();
5112 if (RegVT.getSizeInBits() == 128)
5113 return lowerATOMIC_LDST_I128(Op, DAG);
5114 return lowerLoadF16(Op, DAG);
5115}
5116
5117SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5118 SelectionDAG &DAG) const {
5119 auto *Node = cast<AtomicSDNode>(Op.getNode());
5120 if (Node->getMemoryVT().getSizeInBits() == 128)
5121 return lowerATOMIC_LDST_I128(Op, DAG);
5122 return lowerStoreF16(Op, DAG);
5123}
5124
5125SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5126 SelectionDAG &DAG) const {
5127 auto *Node = cast<AtomicSDNode>(Op.getNode());
5128 assert(
5129 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5130 "Only custom lowering i128 or f128.");
5131 // Use same code to handle both legal and non-legal i128 types.
5133 LowerOperationWrapper(Node, Results, DAG);
5134 return DAG.getMergeValues(Results, SDLoc(Op));
5135}
5136
5137// Prepare for a Compare And Swap for a subword operation. This needs to be
5138// done in memory with 4 bytes at natural alignment.
5140 SDValue &AlignedAddr, SDValue &BitShift,
5141 SDValue &NegBitShift) {
5142 EVT PtrVT = Addr.getValueType();
5143 EVT WideVT = MVT::i32;
5144
5145 // Get the address of the containing word.
5146 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5147 DAG.getSignedConstant(-4, DL, PtrVT));
5148
5149 // Get the number of bits that the word must be rotated left in order
5150 // to bring the field to the top bits of a GR32.
5151 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5152 DAG.getConstant(3, DL, PtrVT));
5153 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5154
5155 // Get the complementing shift amount, for rotating a field in the top
5156 // bits back to its proper position.
5157 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5158 DAG.getConstant(0, DL, WideVT), BitShift);
5159
5160}
5161
5162// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5163// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5164SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5165 SelectionDAG &DAG,
5166 unsigned Opcode) const {
5167 auto *Node = cast<AtomicSDNode>(Op.getNode());
5168
5169 // 32-bit operations need no special handling.
5170 EVT NarrowVT = Node->getMemoryVT();
5171 EVT WideVT = MVT::i32;
5172 if (NarrowVT == WideVT)
5173 return Op;
5174
5175 int64_t BitSize = NarrowVT.getSizeInBits();
5176 SDValue ChainIn = Node->getChain();
5177 SDValue Addr = Node->getBasePtr();
5178 SDValue Src2 = Node->getVal();
5179 MachineMemOperand *MMO = Node->getMemOperand();
5180 SDLoc DL(Node);
5181
5182 // Convert atomic subtracts of constants into additions.
5183 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5184 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5185 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5186 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5187 Src2.getValueType());
5188 }
5189
5190 SDValue AlignedAddr, BitShift, NegBitShift;
5191 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5192
5193 // Extend the source operand to 32 bits and prepare it for the inner loop.
5194 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5195 // operations require the source to be shifted in advance. (This shift
5196 // can be folded if the source is constant.) For AND and NAND, the lower
5197 // bits must be set, while for other opcodes they should be left clear.
5198 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5199 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5200 DAG.getConstant(32 - BitSize, DL, WideVT));
5201 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5202 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5203 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5204 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5205
5206 // Construct the ATOMIC_LOADW_* node.
5207 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5208 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5209 DAG.getConstant(BitSize, DL, WideVT) };
5210 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5211 NarrowVT, MMO);
5212
5213 // Rotate the result of the final CS so that the field is in the lower
5214 // bits of a GR32, then truncate it.
5215 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5216 DAG.getConstant(BitSize, DL, WideVT));
5217 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5218
5219 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5220 return DAG.getMergeValues(RetOps, DL);
5221}
5222
5223// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5224// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5225SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5226 SelectionDAG &DAG) const {
5227 auto *Node = cast<AtomicSDNode>(Op.getNode());
5228 EVT MemVT = Node->getMemoryVT();
5229 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5230 // A full-width operation: negate and use LAA(G).
5231 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5232 assert(Subtarget.hasInterlockedAccess1() &&
5233 "Should have been expanded by AtomicExpand pass.");
5234 SDValue Src2 = Node->getVal();
5235 SDLoc DL(Src2);
5236 SDValue NegSrc2 =
5237 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5238 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5239 Node->getChain(), Node->getBasePtr(), NegSrc2,
5240 Node->getMemOperand());
5241 }
5242
5243 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5244}
5245
5246// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5247SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5248 SelectionDAG &DAG) const {
5249 auto *Node = cast<AtomicSDNode>(Op.getNode());
5250 SDValue ChainIn = Node->getOperand(0);
5251 SDValue Addr = Node->getOperand(1);
5252 SDValue CmpVal = Node->getOperand(2);
5253 SDValue SwapVal = Node->getOperand(3);
5254 MachineMemOperand *MMO = Node->getMemOperand();
5255 SDLoc DL(Node);
5256
5257 if (Node->getMemoryVT() == MVT::i128) {
5258 // Use same code to handle both legal and non-legal i128 types.
5260 LowerOperationWrapper(Node, Results, DAG);
5261 return DAG.getMergeValues(Results, DL);
5262 }
5263
5264 // We have native support for 32-bit and 64-bit compare and swap, but we
5265 // still need to expand extracting the "success" result from the CC.
5266 EVT NarrowVT = Node->getMemoryVT();
5267 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5268 if (NarrowVT == WideVT) {
5269 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5270 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5271 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5272 DL, Tys, Ops, NarrowVT, MMO);
5273 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5275
5276 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5277 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5278 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5279 return SDValue();
5280 }
5281
5282 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5283 // via a fullword ATOMIC_CMP_SWAPW operation.
5284 int64_t BitSize = NarrowVT.getSizeInBits();
5285
5286 SDValue AlignedAddr, BitShift, NegBitShift;
5287 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5288
5289 // Construct the ATOMIC_CMP_SWAPW node.
5290 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5291 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5292 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5293 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5294 VTList, Ops, NarrowVT, MMO);
5295 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5297
5298 // emitAtomicCmpSwapW() will zero extend the result (original value).
5299 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5300 DAG.getValueType(NarrowVT));
5301 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5302 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5303 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5304 return SDValue();
5305}
5306
5308SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5309 // Because of how we convert atomic_load and atomic_store to normal loads and
5310 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5311 // since DAGCombine hasn't been updated to account for atomic, but non
5312 // volatile loads. (See D57601)
5313 if (auto *SI = dyn_cast<StoreInst>(&I))
5314 if (SI->isAtomic())
5316 if (auto *LI = dyn_cast<LoadInst>(&I))
5317 if (LI->isAtomic())
5319 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5320 if (AI->isAtomic())
5322 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5323 if (AI->isAtomic())
5326}
5327
5328SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5329 SelectionDAG &DAG) const {
5330 MachineFunction &MF = DAG.getMachineFunction();
5331 auto *Regs = Subtarget.getSpecialRegisters();
5333 report_fatal_error("Variable-sized stack allocations are not supported "
5334 "in GHC calling convention");
5335 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5336 Regs->getStackPointerRegister(), Op.getValueType());
5337}
5338
5339SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5340 SelectionDAG &DAG) const {
5341 MachineFunction &MF = DAG.getMachineFunction();
5342 auto *Regs = Subtarget.getSpecialRegisters();
5343 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5344
5346 report_fatal_error("Variable-sized stack allocations are not supported "
5347 "in GHC calling convention");
5348
5349 SDValue Chain = Op.getOperand(0);
5350 SDValue NewSP = Op.getOperand(1);
5351 SDValue Backchain;
5352 SDLoc DL(Op);
5353
5354 if (StoreBackchain) {
5355 SDValue OldSP = DAG.getCopyFromReg(
5356 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5357 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5358 MachinePointerInfo());
5359 }
5360
5361 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5362
5363 if (StoreBackchain)
5364 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5365 MachinePointerInfo());
5366
5367 return Chain;
5368}
5369
5370SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5371 SelectionDAG &DAG) const {
5372 bool IsData = Op.getConstantOperandVal(4);
5373 if (!IsData)
5374 // Just preserve the chain.
5375 return Op.getOperand(0);
5376
5377 SDLoc DL(Op);
5378 bool IsWrite = Op.getConstantOperandVal(2);
5379 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5380 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5381 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5382 Op.getOperand(1)};
5383 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5384 Node->getVTList(), Ops,
5385 Node->getMemoryVT(), Node->getMemOperand());
5386}
5387
5388SDValue
5389SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5390 SelectionDAG &DAG) const {
5391 unsigned Opcode, CCValid;
5392 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5393 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5394 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5395 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5396 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5397 return SDValue();
5398 }
5399
5400 return SDValue();
5401}
5402
5403SDValue
5404SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5405 SelectionDAG &DAG) const {
5406 unsigned Opcode, CCValid;
5407 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5408 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5409 if (Op->getNumValues() == 1)
5410 return getCCResult(DAG, SDValue(Node, 0));
5411 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5412 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5413 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5414 }
5415
5416 unsigned Id = Op.getConstantOperandVal(0);
5417 switch (Id) {
5418 case Intrinsic::thread_pointer:
5419 return lowerThreadPointer(SDLoc(Op), DAG);
5420
5421 case Intrinsic::s390_vpdi:
5422 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5423 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5424
5425 case Intrinsic::s390_vperm:
5426 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5427 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5428
5429 case Intrinsic::s390_vuphb:
5430 case Intrinsic::s390_vuphh:
5431 case Intrinsic::s390_vuphf:
5432 case Intrinsic::s390_vuphg:
5433 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5434 Op.getOperand(1));
5435
5436 case Intrinsic::s390_vuplhb:
5437 case Intrinsic::s390_vuplhh:
5438 case Intrinsic::s390_vuplhf:
5439 case Intrinsic::s390_vuplhg:
5440 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5441 Op.getOperand(1));
5442
5443 case Intrinsic::s390_vuplb:
5444 case Intrinsic::s390_vuplhw:
5445 case Intrinsic::s390_vuplf:
5446 case Intrinsic::s390_vuplg:
5447 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5448 Op.getOperand(1));
5449
5450 case Intrinsic::s390_vupllb:
5451 case Intrinsic::s390_vupllh:
5452 case Intrinsic::s390_vupllf:
5453 case Intrinsic::s390_vupllg:
5454 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5455 Op.getOperand(1));
5456
5457 case Intrinsic::s390_vsumb:
5458 case Intrinsic::s390_vsumh:
5459 case Intrinsic::s390_vsumgh:
5460 case Intrinsic::s390_vsumgf:
5461 case Intrinsic::s390_vsumqf:
5462 case Intrinsic::s390_vsumqg:
5463 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5464 Op.getOperand(1), Op.getOperand(2));
5465
5466 case Intrinsic::s390_vaq:
5467 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5468 Op.getOperand(1), Op.getOperand(2));
5469 case Intrinsic::s390_vaccb:
5470 case Intrinsic::s390_vacch:
5471 case Intrinsic::s390_vaccf:
5472 case Intrinsic::s390_vaccg:
5473 case Intrinsic::s390_vaccq:
5474 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5475 Op.getOperand(1), Op.getOperand(2));
5476 case Intrinsic::s390_vacq:
5477 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5478 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5479 case Intrinsic::s390_vacccq:
5480 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5481 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5482
5483 case Intrinsic::s390_vsq:
5484 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5485 Op.getOperand(1), Op.getOperand(2));
5486 case Intrinsic::s390_vscbib:
5487 case Intrinsic::s390_vscbih:
5488 case Intrinsic::s390_vscbif:
5489 case Intrinsic::s390_vscbig:
5490 case Intrinsic::s390_vscbiq:
5491 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5492 Op.getOperand(1), Op.getOperand(2));
5493 case Intrinsic::s390_vsbiq:
5494 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5495 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5496 case Intrinsic::s390_vsbcbiq:
5497 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5498 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5499
5500 case Intrinsic::s390_vmhb:
5501 case Intrinsic::s390_vmhh:
5502 case Intrinsic::s390_vmhf:
5503 case Intrinsic::s390_vmhg:
5504 case Intrinsic::s390_vmhq:
5505 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5506 Op.getOperand(1), Op.getOperand(2));
5507 case Intrinsic::s390_vmlhb:
5508 case Intrinsic::s390_vmlhh:
5509 case Intrinsic::s390_vmlhf:
5510 case Intrinsic::s390_vmlhg:
5511 case Intrinsic::s390_vmlhq:
5512 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5513 Op.getOperand(1), Op.getOperand(2));
5514
5515 case Intrinsic::s390_vmahb:
5516 case Intrinsic::s390_vmahh:
5517 case Intrinsic::s390_vmahf:
5518 case Intrinsic::s390_vmahg:
5519 case Intrinsic::s390_vmahq:
5520 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5521 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5522 case Intrinsic::s390_vmalhb:
5523 case Intrinsic::s390_vmalhh:
5524 case Intrinsic::s390_vmalhf:
5525 case Intrinsic::s390_vmalhg:
5526 case Intrinsic::s390_vmalhq:
5527 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5528 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5529
5530 case Intrinsic::s390_vmeb:
5531 case Intrinsic::s390_vmeh:
5532 case Intrinsic::s390_vmef:
5533 case Intrinsic::s390_vmeg:
5534 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5535 Op.getOperand(1), Op.getOperand(2));
5536 case Intrinsic::s390_vmleb:
5537 case Intrinsic::s390_vmleh:
5538 case Intrinsic::s390_vmlef:
5539 case Intrinsic::s390_vmleg:
5540 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5541 Op.getOperand(1), Op.getOperand(2));
5542 case Intrinsic::s390_vmob:
5543 case Intrinsic::s390_vmoh:
5544 case Intrinsic::s390_vmof:
5545 case Intrinsic::s390_vmog:
5546 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5547 Op.getOperand(1), Op.getOperand(2));
5548 case Intrinsic::s390_vmlob:
5549 case Intrinsic::s390_vmloh:
5550 case Intrinsic::s390_vmlof:
5551 case Intrinsic::s390_vmlog:
5552 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5553 Op.getOperand(1), Op.getOperand(2));
5554
5555 case Intrinsic::s390_vmaeb:
5556 case Intrinsic::s390_vmaeh:
5557 case Intrinsic::s390_vmaef:
5558 case Intrinsic::s390_vmaeg:
5559 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5560 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5561 Op.getOperand(1), Op.getOperand(2)),
5562 Op.getOperand(3));
5563 case Intrinsic::s390_vmaleb:
5564 case Intrinsic::s390_vmaleh:
5565 case Intrinsic::s390_vmalef:
5566 case Intrinsic::s390_vmaleg:
5567 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5568 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5569 Op.getOperand(1), Op.getOperand(2)),
5570 Op.getOperand(3));
5571 case Intrinsic::s390_vmaob:
5572 case Intrinsic::s390_vmaoh:
5573 case Intrinsic::s390_vmaof:
5574 case Intrinsic::s390_vmaog:
5575 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5576 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5577 Op.getOperand(1), Op.getOperand(2)),
5578 Op.getOperand(3));
5579 case Intrinsic::s390_vmalob:
5580 case Intrinsic::s390_vmaloh:
5581 case Intrinsic::s390_vmalof:
5582 case Intrinsic::s390_vmalog:
5583 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5584 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5585 Op.getOperand(1), Op.getOperand(2)),
5586 Op.getOperand(3));
5587 }
5588
5589 return SDValue();
5590}
5591
5592namespace {
5593// Says that SystemZISD operation Opcode can be used to perform the equivalent
5594// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5595// Operand is the constant third operand, otherwise it is the number of
5596// bytes in each element of the result.
5597struct Permute {
5598 unsigned Opcode;
5599 unsigned Operand;
5600 unsigned char Bytes[SystemZ::VectorBytes];
5601};
5602}
5603
5604static const Permute PermuteForms[] = {
5605 // VMRHG
5606 { SystemZISD::MERGE_HIGH, 8,
5607 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5608 // VMRHF
5609 { SystemZISD::MERGE_HIGH, 4,
5610 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5611 // VMRHH
5612 { SystemZISD::MERGE_HIGH, 2,
5613 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5614 // VMRHB
5615 { SystemZISD::MERGE_HIGH, 1,
5616 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5617 // VMRLG
5618 { SystemZISD::MERGE_LOW, 8,
5619 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5620 // VMRLF
5621 { SystemZISD::MERGE_LOW, 4,
5622 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5623 // VMRLH
5624 { SystemZISD::MERGE_LOW, 2,
5625 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5626 // VMRLB
5627 { SystemZISD::MERGE_LOW, 1,
5628 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5629 // VPKG
5630 { SystemZISD::PACK, 4,
5631 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5632 // VPKF
5633 { SystemZISD::PACK, 2,
5634 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5635 // VPKH
5636 { SystemZISD::PACK, 1,
5637 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5638 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5639 { SystemZISD::PERMUTE_DWORDS, 4,
5640 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5641 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5642 { SystemZISD::PERMUTE_DWORDS, 1,
5643 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5644};
5645
5646// Called after matching a vector shuffle against a particular pattern.
5647// Both the original shuffle and the pattern have two vector operands.
5648// OpNos[0] is the operand of the original shuffle that should be used for
5649// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5650// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5651// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5652// for operands 0 and 1 of the pattern.
5653static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5654 if (OpNos[0] < 0) {
5655 if (OpNos[1] < 0)
5656 return false;
5657 OpNo0 = OpNo1 = OpNos[1];
5658 } else if (OpNos[1] < 0) {
5659 OpNo0 = OpNo1 = OpNos[0];
5660 } else {
5661 OpNo0 = OpNos[0];
5662 OpNo1 = OpNos[1];
5663 }
5664 return true;
5665}
5666
5667// Bytes is a VPERM-like permute vector, except that -1 is used for
5668// undefined bytes. Return true if the VPERM can be implemented using P.
5669// When returning true set OpNo0 to the VPERM operand that should be
5670// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5671//
5672// For example, if swapping the VPERM operands allows P to match, OpNo0
5673// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5674// operand, but rewriting it to use two duplicated operands allows it to
5675// match P, then OpNo0 and OpNo1 will be the same.
5676static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5677 unsigned &OpNo0, unsigned &OpNo1) {
5678 int OpNos[] = { -1, -1 };
5679 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5680 int Elt = Bytes[I];
5681 if (Elt >= 0) {
5682 // Make sure that the two permute vectors use the same suboperand
5683 // byte number. Only the operand numbers (the high bits) are
5684 // allowed to differ.
5685 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5686 return false;
5687 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5688 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5689 // Make sure that the operand mappings are consistent with previous
5690 // elements.
5691 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5692 return false;
5693 OpNos[ModelOpNo] = RealOpNo;
5694 }
5695 }
5696 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5697}
5698
5699// As above, but search for a matching permute.
5700static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5701 unsigned &OpNo0, unsigned &OpNo1) {
5702 for (auto &P : PermuteForms)
5703 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5704 return &P;
5705 return nullptr;
5706}
5707
5708// Bytes is a VPERM-like permute vector, except that -1 is used for
5709// undefined bytes. This permute is an operand of an outer permute.
5710// See whether redistributing the -1 bytes gives a shuffle that can be
5711// implemented using P. If so, set Transform to a VPERM-like permute vector
5712// that, when applied to the result of P, gives the original permute in Bytes.
5714 const Permute &P,
5715 SmallVectorImpl<int> &Transform) {
5716 unsigned To = 0;
5717 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5718 int Elt = Bytes[From];
5719 if (Elt < 0)
5720 // Byte number From of the result is undefined.
5721 Transform[From] = -1;
5722 else {
5723 while (P.Bytes[To] != Elt) {
5724 To += 1;
5725 if (To == SystemZ::VectorBytes)
5726 return false;
5727 }
5728 Transform[From] = To;
5729 }
5730 }
5731 return true;
5732}
5733
5734// As above, but search for a matching permute.
5735static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5736 SmallVectorImpl<int> &Transform) {
5737 for (auto &P : PermuteForms)
5738 if (matchDoublePermute(Bytes, P, Transform))
5739 return &P;
5740 return nullptr;
5741}
5742
5743// Convert the mask of the given shuffle op into a byte-level mask,
5744// as if it had type vNi8.
5745static bool getVPermMask(SDValue ShuffleOp,
5746 SmallVectorImpl<int> &Bytes) {
5747 EVT VT = ShuffleOp.getValueType();
5748 unsigned NumElements = VT.getVectorNumElements();
5749 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5750
5751 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5752 Bytes.resize(NumElements * BytesPerElement, -1);
5753 for (unsigned I = 0; I < NumElements; ++I) {
5754 int Index = VSN->getMaskElt(I);
5755 if (Index >= 0)
5756 for (unsigned J = 0; J < BytesPerElement; ++J)
5757 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5758 }
5759 return true;
5760 }
5761 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5762 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5763 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5764 Bytes.resize(NumElements * BytesPerElement, -1);
5765 for (unsigned I = 0; I < NumElements; ++I)
5766 for (unsigned J = 0; J < BytesPerElement; ++J)
5767 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5768 return true;
5769 }
5770 return false;
5771}
5772
5773// Bytes is a VPERM-like permute vector, except that -1 is used for
5774// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5775// the result come from a contiguous sequence of bytes from one input.
5776// Set Base to the selector for the first byte if so.
5777static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5778 unsigned BytesPerElement, int &Base) {
5779 Base = -1;
5780 for (unsigned I = 0; I < BytesPerElement; ++I) {
5781 if (Bytes[Start + I] >= 0) {
5782 unsigned Elem = Bytes[Start + I];
5783 if (Base < 0) {
5784 Base = Elem - I;
5785 // Make sure the bytes would come from one input operand.
5786 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5787 return false;
5788 } else if (unsigned(Base) != Elem - I)
5789 return false;
5790 }
5791 }
5792 return true;
5793}
5794
5795// Bytes is a VPERM-like permute vector, except that -1 is used for
5796// undefined bytes. Return true if it can be performed using VSLDB.
5797// When returning true, set StartIndex to the shift amount and OpNo0
5798// and OpNo1 to the VPERM operands that should be used as the first
5799// and second shift operand respectively.
5801 unsigned &StartIndex, unsigned &OpNo0,
5802 unsigned &OpNo1) {
5803 int OpNos[] = { -1, -1 };
5804 int Shift = -1;
5805 for (unsigned I = 0; I < 16; ++I) {
5806 int Index = Bytes[I];
5807 if (Index >= 0) {
5808 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5809 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5810 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5811 if (Shift < 0)
5812 Shift = ExpectedShift;
5813 else if (Shift != ExpectedShift)
5814 return false;
5815 // Make sure that the operand mappings are consistent with previous
5816 // elements.
5817 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5818 return false;
5819 OpNos[ModelOpNo] = RealOpNo;
5820 }
5821 }
5822 StartIndex = Shift;
5823 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5824}
5825
5826// Create a node that performs P on operands Op0 and Op1, casting the
5827// operands to the appropriate type. The type of the result is determined by P.
5829 const Permute &P, SDValue Op0, SDValue Op1) {
5830 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5831 // elements of a PACK are twice as wide as the outputs.
5832 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5833 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5834 P.Operand);
5835 // Cast both operands to the appropriate type.
5836 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5837 SystemZ::VectorBytes / InBytes);
5838 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5839 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5840 SDValue Op;
5841 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5842 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5843 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5844 } else if (P.Opcode == SystemZISD::PACK) {
5845 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5846 SystemZ::VectorBytes / P.Operand);
5847 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5848 } else {
5849 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5850 }
5851 return Op;
5852}
5853
5854static bool isZeroVector(SDValue N) {
5855 if (N->getOpcode() == ISD::BITCAST)
5856 N = N->getOperand(0);
5857 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5858 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5859 return Op->getZExtValue() == 0;
5860 return ISD::isBuildVectorAllZeros(N.getNode());
5861}
5862
5863// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5864static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5865 for (unsigned I = 0; I < Num ; I++)
5866 if (isZeroVector(Ops[I]))
5867 return I;
5868 return UINT32_MAX;
5869}
5870
5871// Bytes is a VPERM-like permute vector, except that -1 is used for
5872// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5873// VSLDB or VPERM.
5875 SDValue *Ops,
5876 const SmallVectorImpl<int> &Bytes) {
5877 for (unsigned I = 0; I < 2; ++I)
5878 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5879
5880 // First see whether VSLDB can be used.
5881 unsigned StartIndex, OpNo0, OpNo1;
5882 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5883 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5884 Ops[OpNo1],
5885 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5886
5887 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5888 // eliminate a zero vector by reusing any zero index in the permute vector.
5889 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5890 if (ZeroVecIdx != UINT32_MAX) {
5891 bool MaskFirst = true;
5892 int ZeroIdx = -1;
5893 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5894 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5895 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5896 if (OpNo == ZeroVecIdx && I == 0) {
5897 // If the first byte is zero, use mask as first operand.
5898 ZeroIdx = 0;
5899 break;
5900 }
5901 if (OpNo != ZeroVecIdx && Byte == 0) {
5902 // If mask contains a zero, use it by placing that vector first.
5903 ZeroIdx = I + SystemZ::VectorBytes;
5904 MaskFirst = false;
5905 break;
5906 }
5907 }
5908 if (ZeroIdx != -1) {
5909 SDValue IndexNodes[SystemZ::VectorBytes];
5910 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5911 if (Bytes[I] >= 0) {
5912 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5913 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5914 if (OpNo == ZeroVecIdx)
5915 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5916 else {
5917 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5918 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5919 }
5920 } else
5921 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5922 }
5923 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5924 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5925 if (MaskFirst)
5926 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5927 Mask);
5928 else
5929 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5930 Mask);
5931 }
5932 }
5933
5934 SDValue IndexNodes[SystemZ::VectorBytes];
5935 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5936 if (Bytes[I] >= 0)
5937 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5938 else
5939 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5940 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5941 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5942 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5943}
5944
5945namespace {
5946// Describes a general N-operand vector shuffle.
5947struct GeneralShuffle {
5948 GeneralShuffle(EVT vt)
5949 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5950 void addUndef();
5951 bool add(SDValue, unsigned);
5952 SDValue getNode(SelectionDAG &, const SDLoc &);
5953 void tryPrepareForUnpack();
5954 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5955 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5956
5957 // The operands of the shuffle.
5959
5960 // Index I is -1 if byte I of the result is undefined. Otherwise the
5961 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5962 // Bytes[I] / SystemZ::VectorBytes.
5964
5965 // The type of the shuffle result.
5966 EVT VT;
5967
5968 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5969 unsigned UnpackFromEltSize;
5970 // True if the final unpack uses the low half.
5971 bool UnpackLow;
5972};
5973} // namespace
5974
5975// Add an extra undefined element to the shuffle.
5976void GeneralShuffle::addUndef() {
5977 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5978 for (unsigned I = 0; I < BytesPerElement; ++I)
5979 Bytes.push_back(-1);
5980}
5981
5982// Add an extra element to the shuffle, taking it from element Elem of Op.
5983// A null Op indicates a vector input whose value will be calculated later;
5984// there is at most one such input per shuffle and it always has the same
5985// type as the result. Aborts and returns false if the source vector elements
5986// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5987// LLVM they become implicitly extended, but this is rare and not optimized.
5988bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5989 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5990
5991 // The source vector can have wider elements than the result,
5992 // either through an explicit TRUNCATE or because of type legalization.
5993 // We want the least significant part.
5994 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5995 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5996
5997 // Return false if the source elements are smaller than their destination
5998 // elements.
5999 if (FromBytesPerElement < BytesPerElement)
6000 return false;
6001
6002 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
6003 (FromBytesPerElement - BytesPerElement));
6004
6005 // Look through things like shuffles and bitcasts.
6006 while (Op.getNode()) {
6007 if (Op.getOpcode() == ISD::BITCAST)
6008 Op = Op.getOperand(0);
6009 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
6010 // See whether the bytes we need come from a contiguous part of one
6011 // operand.
6013 if (!getVPermMask(Op, OpBytes))
6014 break;
6015 int NewByte;
6016 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
6017 break;
6018 if (NewByte < 0) {
6019 addUndef();
6020 return true;
6021 }
6022 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
6023 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
6024 } else if (Op.isUndef()) {
6025 addUndef();
6026 return true;
6027 } else
6028 break;
6029 }
6030
6031 // Make sure that the source of the extraction is in Ops.
6032 unsigned OpNo = 0;
6033 for (; OpNo < Ops.size(); ++OpNo)
6034 if (Ops[OpNo] == Op)
6035 break;
6036 if (OpNo == Ops.size())
6037 Ops.push_back(Op);
6038
6039 // Add the element to Bytes.
6040 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6041 for (unsigned I = 0; I < BytesPerElement; ++I)
6042 Bytes.push_back(Base + I);
6043
6044 return true;
6045}
6046
6047// Return SDNodes for the completed shuffle.
6048SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6049 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6050
6051 if (Ops.size() == 0)
6052 return DAG.getUNDEF(VT);
6053
6054 // Use a single unpack if possible as the last operation.
6055 tryPrepareForUnpack();
6056
6057 // Make sure that there are at least two shuffle operands.
6058 if (Ops.size() == 1)
6059 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6060
6061 // Create a tree of shuffles, deferring root node until after the loop.
6062 // Try to redistribute the undefined elements of non-root nodes so that
6063 // the non-root shuffles match something like a pack or merge, then adjust
6064 // the parent node's permute vector to compensate for the new order.
6065 // Among other things, this copes with vectors like <2 x i16> that were
6066 // padded with undefined elements during type legalization.
6067 //
6068 // In the best case this redistribution will lead to the whole tree
6069 // using packs and merges. It should rarely be a loss in other cases.
6070 unsigned Stride = 1;
6071 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6072 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6073 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6074
6075 // Create a mask for just these two operands.
6077 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6078 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6079 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6080 if (OpNo == I)
6081 NewBytes[J] = Byte;
6082 else if (OpNo == I + Stride)
6083 NewBytes[J] = SystemZ::VectorBytes + Byte;
6084 else
6085 NewBytes[J] = -1;
6086 }
6087 // See if it would be better to reorganize NewMask to avoid using VPERM.
6089 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6090 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6091 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6092 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6093 if (NewBytes[J] >= 0) {
6094 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6095 "Invalid double permute");
6096 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6097 } else
6098 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6099 }
6100 } else {
6101 // Just use NewBytes on the operands.
6102 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6103 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6104 if (NewBytes[J] >= 0)
6105 Bytes[J] = I * SystemZ::VectorBytes + J;
6106 }
6107 }
6108 }
6109
6110 // Now we just have 2 inputs. Put the second operand in Ops[1].
6111 if (Stride > 1) {
6112 Ops[1] = Ops[Stride];
6113 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6114 if (Bytes[I] >= int(SystemZ::VectorBytes))
6115 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6116 }
6117
6118 // Look for an instruction that can do the permute without resorting
6119 // to VPERM.
6120 unsigned OpNo0, OpNo1;
6121 SDValue Op;
6122 if (unpackWasPrepared() && Ops[1].isUndef())
6123 Op = Ops[0];
6124 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6125 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6126 else
6127 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6128
6129 Op = insertUnpackIfPrepared(DAG, DL, Op);
6130
6131 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6132}
6133
6134#ifndef NDEBUG
6135static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6136 dbgs() << Msg.c_str() << " { ";
6137 for (unsigned I = 0; I < Bytes.size(); I++)
6138 dbgs() << Bytes[I] << " ";
6139 dbgs() << "}\n";
6140}
6141#endif
6142
6143// If the Bytes vector matches an unpack operation, prepare to do the unpack
6144// after all else by removing the zero vector and the effect of the unpack on
6145// Bytes.
6146void GeneralShuffle::tryPrepareForUnpack() {
6147 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6148 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6149 return;
6150
6151 // Only do this if removing the zero vector reduces the depth, otherwise
6152 // the critical path will increase with the final unpack.
6153 if (Ops.size() > 2 &&
6154 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6155 return;
6156
6157 // Find an unpack that would allow removing the zero vector from Ops.
6158 UnpackFromEltSize = 1;
6159 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6160 bool MatchUnpack = true;
6162 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6163 unsigned ToEltSize = UnpackFromEltSize * 2;
6164 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6165 if (!IsZextByte)
6166 SrcBytes.push_back(Bytes[Elt]);
6167 if (Bytes[Elt] != -1) {
6168 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6169 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6170 MatchUnpack = false;
6171 break;
6172 }
6173 }
6174 }
6175 if (MatchUnpack) {
6176 if (Ops.size() == 2) {
6177 // Don't use unpack if a single source operand needs rearrangement.
6178 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6179 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6180 if (SrcBytes[i] == -1)
6181 continue;
6182 if (SrcBytes[i] % 16 != int(i))
6183 CanUseUnpackHigh = false;
6184 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6185 CanUseUnpackLow = false;
6186 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6187 UnpackFromEltSize = UINT_MAX;
6188 return;
6189 }
6190 }
6191 if (!CanUseUnpackHigh)
6192 UnpackLow = true;
6193 }
6194 break;
6195 }
6196 }
6197 if (UnpackFromEltSize > 4)
6198 return;
6199
6200 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6201 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6202 << ".\n";
6203 dumpBytes(Bytes, "Original Bytes vector:"););
6204
6205 // Apply the unpack in reverse to the Bytes array.
6206 unsigned B = 0;
6207 if (UnpackLow) {
6208 while (B < SystemZ::VectorBytes / 2)
6209 Bytes[B++] = -1;
6210 }
6211 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6212 Elt += UnpackFromEltSize;
6213 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6214 Bytes[B] = Bytes[Elt];
6215 }
6216 if (!UnpackLow) {
6217 while (B < SystemZ::VectorBytes)
6218 Bytes[B++] = -1;
6219 }
6220
6221 // Remove the zero vector from Ops
6222 Ops.erase(&Ops[ZeroVecOpNo]);
6223 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6224 if (Bytes[I] >= 0) {
6225 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6226 if (OpNo > ZeroVecOpNo)
6227 Bytes[I] -= SystemZ::VectorBytes;
6228 }
6229
6230 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6231 dbgs() << "\n";);
6232}
6233
6234SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6235 const SDLoc &DL,
6236 SDValue Op) {
6237 if (!unpackWasPrepared())
6238 return Op;
6239 unsigned InBits = UnpackFromEltSize * 8;
6240 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6241 SystemZ::VectorBits / InBits);
6242 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6243 unsigned OutBits = InBits * 2;
6244 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6245 SystemZ::VectorBits / OutBits);
6246 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6247 : SystemZISD::UNPACKL_HIGH,
6248 DL, OutVT, PackedOp);
6249}
6250
6251// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6253 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6254 if (!Op.getOperand(I).isUndef())
6255 return false;
6256 return true;
6257}
6258
6259// Return a vector of type VT that contains Value in the first element.
6260// The other elements don't matter.
6262 SDValue Value) {
6263 // If we have a constant, replicate it to all elements and let the
6264 // BUILD_VECTOR lowering take care of it.
6265 if (Value.getOpcode() == ISD::Constant ||
6266 Value.getOpcode() == ISD::ConstantFP) {
6268 return DAG.getBuildVector(VT, DL, Ops);
6269 }
6270 if (Value.isUndef())
6271 return DAG.getUNDEF(VT);
6272 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6273}
6274
6275// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6276// element 1. Used for cases in which replication is cheap.
6278 SDValue Op0, SDValue Op1) {
6279 if (Op0.isUndef()) {
6280 if (Op1.isUndef())
6281 return DAG.getUNDEF(VT);
6282 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6283 }
6284 if (Op1.isUndef())
6285 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6286 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6287 buildScalarToVector(DAG, DL, VT, Op0),
6288 buildScalarToVector(DAG, DL, VT, Op1));
6289}
6290
6291// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6292// vector for them.
6294 SDValue Op1) {
6295 if (Op0.isUndef() && Op1.isUndef())
6296 return DAG.getUNDEF(MVT::v2i64);
6297 // If one of the two inputs is undefined then replicate the other one,
6298 // in order to avoid using another register unnecessarily.
6299 if (Op0.isUndef())
6300 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6301 else if (Op1.isUndef())
6302 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6303 else {
6304 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6305 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6306 }
6307 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6308}
6309
6310// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6311// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6312// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6313// would benefit from this representation and return it if so.
6315 BuildVectorSDNode *BVN) {
6316 EVT VT = BVN->getValueType(0);
6317 unsigned NumElements = VT.getVectorNumElements();
6318
6319 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6320 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6321 // need a BUILD_VECTOR, add an additional placeholder operand for that
6322 // BUILD_VECTOR and store its operands in ResidueOps.
6323 GeneralShuffle GS(VT);
6325 bool FoundOne = false;
6326 for (unsigned I = 0; I < NumElements; ++I) {
6327 SDValue Op = BVN->getOperand(I);
6328 if (Op.getOpcode() == ISD::TRUNCATE)
6329 Op = Op.getOperand(0);
6330 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6331 Op.getOperand(1).getOpcode() == ISD::Constant) {
6332 unsigned Elem = Op.getConstantOperandVal(1);
6333 if (!GS.add(Op.getOperand(0), Elem))
6334 return SDValue();
6335 FoundOne = true;
6336 } else if (Op.isUndef()) {
6337 GS.addUndef();
6338 } else {
6339 if (!GS.add(SDValue(), ResidueOps.size()))
6340 return SDValue();
6341 ResidueOps.push_back(BVN->getOperand(I));
6342 }
6343 }
6344
6345 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6346 if (!FoundOne)
6347 return SDValue();
6348
6349 // Create the BUILD_VECTOR for the remaining elements, if any.
6350 if (!ResidueOps.empty()) {
6351 while (ResidueOps.size() < NumElements)
6352 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6353 for (auto &Op : GS.Ops) {
6354 if (!Op.getNode()) {
6355 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6356 break;
6357 }
6358 }
6359 }
6360 return GS.getNode(DAG, SDLoc(BVN));
6361}
6362
6363bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6364 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6365 return true;
6366 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6367 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6368 return true;
6369 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6370 return true;
6371 return false;
6372}
6373
6375 unsigned MergedBits, EVT VT, SDValue Op0,
6376 SDValue Op1) {
6377 MVT IntVecVT = MVT::getVectorVT(MVT::getIntegerVT(MergedBits),
6378 SystemZ::VectorBits / MergedBits);
6379 assert(VT.getSizeInBits() == 128 && IntVecVT.getSizeInBits() == 128 &&
6380 "Handling full vectors only.");
6381 Op0 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0);
6382 Op1 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op1);
6383 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, DL, IntVecVT, Op0, Op1);
6384 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6385}
6386
6388 EVT VT, SmallVectorImpl<SDValue> &Elems,
6389 unsigned Pos) {
6390 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 0], Elems[Pos + 1]);
6391 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 2], Elems[Pos + 3]);
6392 // Avoid unnecessary undefs by reusing the other operand.
6393 if (Op01.isUndef()) {
6394 if (Op23.isUndef())
6395 return Op01;
6396 Op01 = Op23;
6397 } else if (Op23.isUndef())
6398 Op23 = Op01;
6399 // Merging identical replications is a no-op.
6400 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6401 return Op01;
6402 unsigned MergedBits = VT.getSimpleVT().getScalarSizeInBits() * 2;
6403 return mergeHighParts(DAG, DL, MergedBits, VT, Op01, Op23);
6404}
6405
6406// Combine GPR scalar values Elems into a vector of type VT.
6407SDValue
6408SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6409 SmallVectorImpl<SDValue> &Elems) const {
6410 // See whether there is a single replicated value.
6412 unsigned int NumElements = Elems.size();
6413 unsigned int Count = 0;
6414 for (auto Elem : Elems) {
6415 if (!Elem.isUndef()) {
6416 if (!Single.getNode())
6417 Single = Elem;
6418 else if (Elem != Single) {
6419 Single = SDValue();
6420 break;
6421 }
6422 Count += 1;
6423 }
6424 }
6425 // There are three cases here:
6426 //
6427 // - if the only defined element is a loaded one, the best sequence
6428 // is a replicating load.
6429 //
6430 // - otherwise, if the only defined element is an i64 value, we will
6431 // end up with the same VLVGP sequence regardless of whether we short-cut
6432 // for replication or fall through to the later code.
6433 //
6434 // - otherwise, if the only defined element is an i32 or smaller value,
6435 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6436 // This is only a win if the single defined element is used more than once.
6437 // In other cases we're better off using a single VLVGx.
6438 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6439 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6440
6441 // If all elements are loads, use VLREP/VLEs (below).
6442 bool AllLoads = true;
6443 for (auto Elem : Elems)
6444 if (!isVectorElementLoad(Elem)) {
6445 AllLoads = false;
6446 break;
6447 }
6448
6449 // The best way of building a v2i64 from two i64s is to use VLVGP.
6450 if (VT == MVT::v2i64 && !AllLoads)
6451 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6452
6453 // Use a 64-bit merge high to combine two doubles.
6454 if (VT == MVT::v2f64 && !AllLoads)
6455 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6456
6457 // Build v4f32 values directly from the FPRs:
6458 //
6459 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6460 // V V VMRHF
6461 // <ABxx> <CDxx>
6462 // V VMRHG
6463 // <ABCD>
6464 if (VT == MVT::v4f32 && !AllLoads)
6465 return buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6466
6467 // Same for v8f16.
6468 if (VT == MVT::v8f16 && !AllLoads) {
6469 SDValue Op0123 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6470 SDValue Op4567 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 4);
6471 // Avoid unnecessary undefs by reusing the other operand.
6472 if (Op0123.isUndef())
6473 Op0123 = Op4567;
6474 else if (Op4567.isUndef())
6475 Op4567 = Op0123;
6476 // Merging identical replications is a no-op.
6477 if (Op0123.getOpcode() == SystemZISD::REPLICATE && Op0123 == Op4567)
6478 return Op0123;
6479 return mergeHighParts(DAG, DL, 64, VT, Op0123, Op4567);
6480 }
6481
6482 // Collect the constant terms.
6485
6486 unsigned NumConstants = 0;
6487 for (unsigned I = 0; I < NumElements; ++I) {
6488 SDValue Elem = Elems[I];
6489 if (Elem.getOpcode() == ISD::Constant ||
6490 Elem.getOpcode() == ISD::ConstantFP) {
6491 NumConstants += 1;
6492 Constants[I] = Elem;
6493 Done[I] = true;
6494 }
6495 }
6496 // If there was at least one constant, fill in the other elements of
6497 // Constants with undefs to get a full vector constant and use that
6498 // as the starting point.
6500 SDValue ReplicatedVal;
6501 if (NumConstants > 0) {
6502 for (unsigned I = 0; I < NumElements; ++I)
6503 if (!Constants[I].getNode())
6504 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6505 Result = DAG.getBuildVector(VT, DL, Constants);
6506 } else {
6507 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6508 // avoid a false dependency on any previous contents of the vector
6509 // register.
6510
6511 // Use a VLREP if at least one element is a load. Make sure to replicate
6512 // the load with the most elements having its value.
6513 std::map<const SDNode*, unsigned> UseCounts;
6514 SDNode *LoadMaxUses = nullptr;
6515 for (unsigned I = 0; I < NumElements; ++I)
6516 if (isVectorElementLoad(Elems[I])) {
6517 SDNode *Ld = Elems[I].getNode();
6518 unsigned Count = ++UseCounts[Ld];
6519 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6520 LoadMaxUses = Ld;
6521 }
6522 if (LoadMaxUses != nullptr) {
6523 ReplicatedVal = SDValue(LoadMaxUses, 0);
6524 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6525 } else {
6526 // Try to use VLVGP.
6527 unsigned I1 = NumElements / 2 - 1;
6528 unsigned I2 = NumElements - 1;
6529 bool Def1 = !Elems[I1].isUndef();
6530 bool Def2 = !Elems[I2].isUndef();
6531 if (Def1 || Def2) {
6532 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6533 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6534 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6535 joinDwords(DAG, DL, Elem1, Elem2));
6536 Done[I1] = true;
6537 Done[I2] = true;
6538 } else
6539 Result = DAG.getUNDEF(VT);
6540 }
6541 }
6542
6543 // Use VLVGx to insert the other elements.
6544 for (unsigned I = 0; I < NumElements; ++I)
6545 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6546 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6547 DAG.getConstant(I, DL, MVT::i32));
6548 return Result;
6549}
6550
6551SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6552 SelectionDAG &DAG) const {
6553 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6554 SDLoc DL(Op);
6555 EVT VT = Op.getValueType();
6556
6557 if (BVN->isConstant()) {
6558 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6559 return Op;
6560
6561 // Fall back to loading it from memory.
6562 return SDValue();
6563 }
6564
6565 // See if we should use shuffles to construct the vector from other vectors.
6566 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6567 return Res;
6568
6569 // Detect SCALAR_TO_VECTOR conversions.
6571 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6572
6573 // Otherwise use buildVector to build the vector up from GPRs.
6574 unsigned NumElements = Op.getNumOperands();
6576 for (unsigned I = 0; I < NumElements; ++I)
6577 Ops[I] = Op.getOperand(I);
6578 return buildVector(DAG, DL, VT, Ops);
6579}
6580
6581SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6582 SelectionDAG &DAG) const {
6583 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6584 SDLoc DL(Op);
6585 EVT VT = Op.getValueType();
6586 unsigned NumElements = VT.getVectorNumElements();
6587
6588 if (VSN->isSplat()) {
6589 SDValue Op0 = Op.getOperand(0);
6590 unsigned Index = VSN->getSplatIndex();
6591 assert(Index < VT.getVectorNumElements() &&
6592 "Splat index should be defined and in first operand");
6593 // See whether the value we're splatting is directly available as a scalar.
6594 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6596 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6597 // Otherwise keep it as a vector-to-vector operation.
6598 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6599 DAG.getTargetConstant(Index, DL, MVT::i32));
6600 }
6601
6602 GeneralShuffle GS(VT);
6603 for (unsigned I = 0; I < NumElements; ++I) {
6604 int Elt = VSN->getMaskElt(I);
6605 if (Elt < 0)
6606 GS.addUndef();
6607 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6608 unsigned(Elt) % NumElements))
6609 return SDValue();
6610 }
6611 return GS.getNode(DAG, SDLoc(VSN));
6612}
6613
6614SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6615 SelectionDAG &DAG) const {
6616 SDLoc DL(Op);
6617 // Just insert the scalar into element 0 of an undefined vector.
6618 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6619 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6620 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6621}
6622
6623// Shift the lower 2 bytes of Op to the left in order to insert into the
6624// upper 2 bytes of the FP register.
6626 assert(Op.getSimpleValueType() == MVT::i64 &&
6627 "Expexted to convert i64 to f16.");
6628 SDLoc DL(Op);
6629 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6630 DAG.getConstant(48, DL, MVT::i64));
6631 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6632 SDValue F16Val =
6633 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6634 return F16Val;
6635}
6636
6637// Extract Op into GPR and shift the 2 f16 bytes to the right.
6639 assert(Op.getSimpleValueType() == MVT::f16 &&
6640 "Expected to convert f16 to i64.");
6641 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6642 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6643 SDValue(U32, 0), Op);
6644 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6645 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6646 DAG.getConstant(48, DL, MVT::i32));
6647 return Shft;
6648}
6649
6650SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6651 SelectionDAG &DAG) const {
6652 // Handle insertions of floating-point values.
6653 SDLoc DL(Op);
6654 SDValue Op0 = Op.getOperand(0);
6655 SDValue Op1 = Op.getOperand(1);
6656 SDValue Op2 = Op.getOperand(2);
6657 EVT VT = Op.getValueType();
6658
6659 // Insertions into constant indices of a v2f64 can be done using VPDI.
6660 // However, if the inserted value is a bitcast or a constant then it's
6661 // better to use GPRs, as below.
6662 if (VT == MVT::v2f64 &&
6663 Op1.getOpcode() != ISD::BITCAST &&
6664 Op1.getOpcode() != ISD::ConstantFP &&
6665 Op2.getOpcode() == ISD::Constant) {
6666 uint64_t Index = Op2->getAsZExtVal();
6667 unsigned Mask = VT.getVectorNumElements() - 1;
6668 if (Index <= Mask)
6669 return Op;
6670 }
6671
6672 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6673 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6674 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6675 SDValue IntOp1 =
6676 VT == MVT::v8f16
6677 ? DAG.getZExtOrTrunc(convertFromF16(Op1, DL, DAG), DL, MVT::i32)
6678 : DAG.getNode(ISD::BITCAST, DL, IntVT, Op1);
6679 SDValue Res =
6680 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6681 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), IntOp1, Op2);
6682 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6683}
6684
6685SDValue
6686SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6687 SelectionDAG &DAG) const {
6688 // Handle extractions of floating-point values.
6689 SDLoc DL(Op);
6690 SDValue Op0 = Op.getOperand(0);
6691 SDValue Op1 = Op.getOperand(1);
6692 EVT VT = Op.getValueType();
6693 EVT VecVT = Op0.getValueType();
6694
6695 // Extractions of constant indices can be done directly.
6696 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6697 uint64_t Index = CIndexN->getZExtValue();
6698 unsigned Mask = VecVT.getVectorNumElements() - 1;
6699 if (Index <= Mask)
6700 return Op;
6701 }
6702
6703 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6704 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6705 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6706 MVT ExtrVT = IntVT == MVT::i16 ? MVT::i32 : IntVT;
6707 SDValue Extr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrVT,
6708 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6709 if (VT == MVT::f16)
6710 return convertToF16(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Extr), DAG);
6711 return DAG.getNode(ISD::BITCAST, DL, VT, Extr);
6712}
6713
6714SDValue SystemZTargetLowering::
6715lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6716 SDValue PackedOp = Op.getOperand(0);
6717 EVT OutVT = Op.getValueType();
6718 EVT InVT = PackedOp.getValueType();
6719 unsigned ToBits = OutVT.getScalarSizeInBits();
6720 unsigned FromBits = InVT.getScalarSizeInBits();
6721 unsigned StartOffset = 0;
6722
6723 // If the input is a VECTOR_SHUFFLE, there are a number of important
6724 // cases where we can directly implement the sign-extension of the
6725 // original input lanes of the shuffle.
6726 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6727 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6728 ArrayRef<int> ShuffleMask = SVN->getMask();
6729 int OutNumElts = OutVT.getVectorNumElements();
6730
6731 // Recognize the special case where the sign-extension can be done
6732 // by the VSEG instruction. Handled via the default expander.
6733 if (ToBits == 64 && OutNumElts == 2) {
6734 int NumElem = ToBits / FromBits;
6735 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6736 return SDValue();
6737 }
6738
6739 // Recognize the special case where we can fold the shuffle by
6740 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6741 int StartOffsetCandidate = -1;
6742 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6743 if (ShuffleMask[Elt] == -1)
6744 continue;
6745 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6746 if (StartOffsetCandidate == -1)
6747 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6748 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6749 continue;
6750 }
6751 StartOffsetCandidate = -1;
6752 break;
6753 }
6754 if (StartOffsetCandidate != -1) {
6755 StartOffset = StartOffsetCandidate;
6756 PackedOp = PackedOp.getOperand(0);
6757 }
6758 }
6759
6760 do {
6761 FromBits *= 2;
6762 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6763 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6764 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6765 if (StartOffset >= OutNumElts) {
6766 Opcode = SystemZISD::UNPACK_LOW;
6767 StartOffset -= OutNumElts;
6768 }
6769 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6770 } while (FromBits != ToBits);
6771 return PackedOp;
6772}
6773
6774// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6775SDValue SystemZTargetLowering::
6776lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6777 SDValue PackedOp = Op.getOperand(0);
6778 SDLoc DL(Op);
6779 EVT OutVT = Op.getValueType();
6780 EVT InVT = PackedOp.getValueType();
6781 unsigned InNumElts = InVT.getVectorNumElements();
6782 unsigned OutNumElts = OutVT.getVectorNumElements();
6783 unsigned NumInPerOut = InNumElts / OutNumElts;
6784
6785 SDValue ZeroVec =
6786 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6787
6788 SmallVector<int, 16> Mask(InNumElts);
6789 unsigned ZeroVecElt = InNumElts;
6790 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6791 unsigned MaskElt = PackedElt * NumInPerOut;
6792 unsigned End = MaskElt + NumInPerOut - 1;
6793 for (; MaskElt < End; MaskElt++)
6794 Mask[MaskElt] = ZeroVecElt++;
6795 Mask[MaskElt] = PackedElt;
6796 }
6797 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6798 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6799}
6800
6801SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6802 unsigned ByScalar) const {
6803 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6804 SDValue Op0 = Op.getOperand(0);
6805 SDValue Op1 = Op.getOperand(1);
6806 SDLoc DL(Op);
6807 EVT VT = Op.getValueType();
6808 unsigned ElemBitSize = VT.getScalarSizeInBits();
6809
6810 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6811 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6812 APInt SplatBits, SplatUndef;
6813 unsigned SplatBitSize;
6814 bool HasAnyUndefs;
6815 // Check for constant splats. Use ElemBitSize as the minimum element
6816 // width and reject splats that need wider elements.
6817 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6818 ElemBitSize, true) &&
6819 SplatBitSize == ElemBitSize) {
6820 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6821 DL, MVT::i32);
6822 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6823 }
6824 // Check for variable splats.
6825 BitVector UndefElements;
6826 SDValue Splat = BVN->getSplatValue(&UndefElements);
6827 if (Splat) {
6828 // Since i32 is the smallest legal type, we either need a no-op
6829 // or a truncation.
6830 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6831 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6832 }
6833 }
6834
6835 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6836 // and the shift amount is directly available in a GPR.
6837 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6838 if (VSN->isSplat()) {
6839 SDValue VSNOp0 = VSN->getOperand(0);
6840 unsigned Index = VSN->getSplatIndex();
6841 assert(Index < VT.getVectorNumElements() &&
6842 "Splat index should be defined and in first operand");
6843 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6844 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6845 // Since i32 is the smallest legal type, we either need a no-op
6846 // or a truncation.
6847 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6848 VSNOp0.getOperand(Index));
6849 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6850 }
6851 }
6852 }
6853
6854 // Otherwise just treat the current form as legal.
6855 return Op;
6856}
6857
6858SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6859 SDLoc DL(Op);
6860
6861 // i128 FSHL with a constant amount that is a multiple of 8 can be
6862 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6863 // facility, FSHL with a constant amount less than 8 can be implemented
6864 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6865 // combination of the two.
6866 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6867 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6868 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6869 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6870 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6871 if (ShiftAmt > 120) {
6872 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6873 // SHR_DOUBLE_BIT emits fewer instructions.
6874 SDValue Val =
6875 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6876 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6877 return DAG.getBitcast(MVT::i128, Val);
6878 }
6879 SmallVector<int, 16> Mask(16);
6880 for (unsigned Elt = 0; Elt < 16; Elt++)
6881 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6882 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6883 if ((ShiftAmt & 7) == 0)
6884 return DAG.getBitcast(MVT::i128, Shuf1);
6885 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6886 SDValue Val =
6887 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6888 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6889 return DAG.getBitcast(MVT::i128, Val);
6890 }
6891 }
6892
6893 return SDValue();
6894}
6895
6896SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6897 SDLoc DL(Op);
6898
6899 // i128 FSHR with a constant amount that is a multiple of 8 can be
6900 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6901 // facility, FSHR with a constant amount less than 8 can be implemented
6902 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6903 // combination of the two.
6904 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6905 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6906 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6907 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6908 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6909 if (ShiftAmt > 120) {
6910 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6911 // SHL_DOUBLE_BIT emits fewer instructions.
6912 SDValue Val =
6913 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6914 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6915 return DAG.getBitcast(MVT::i128, Val);
6916 }
6917 SmallVector<int, 16> Mask(16);
6918 for (unsigned Elt = 0; Elt < 16; Elt++)
6919 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6920 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6921 if ((ShiftAmt & 7) == 0)
6922 return DAG.getBitcast(MVT::i128, Shuf1);
6923 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6924 SDValue Val =
6925 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6926 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6927 return DAG.getBitcast(MVT::i128, Val);
6928 }
6929 }
6930
6931 return SDValue();
6932}
6933
6935 SDLoc DL(Op);
6936 SDValue Src = Op.getOperand(0);
6937 MVT DstVT = Op.getSimpleValueType();
6938
6940 unsigned SrcAS = N->getSrcAddressSpace();
6941
6942 assert(SrcAS != N->getDestAddressSpace() &&
6943 "addrspacecast must be between different address spaces");
6944
6945 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6946 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6947 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6948 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6949 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6950 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6951 } else if (DstVT == MVT::i32) {
6952 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6953 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6954 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6955 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6956 } else {
6957 report_fatal_error("Bad address space in addrspacecast");
6958 }
6959 return Op;
6960}
6961
6962SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6963 SelectionDAG &DAG) const {
6964 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6965 if (In.getSimpleValueType() != MVT::f16)
6966 return Op; // Legal
6967 return SDValue(); // Let legalizer emit the libcall.
6968}
6969
6971 MVT VT, SDValue Arg, SDLoc DL,
6972 SDValue Chain, bool IsStrict) const {
6973 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6974 MakeLibCallOptions CallOptions;
6975 SDValue Result;
6976 std::tie(Result, Chain) =
6977 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6978 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6979}
6980
6981SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6982 SelectionDAG &DAG) const {
6983 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6984 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6985 bool IsStrict = Op->isStrictFPOpcode();
6986 SDLoc DL(Op);
6987 MVT VT = Op.getSimpleValueType();
6988 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6989 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6990 EVT InVT = InOp.getValueType();
6991
6992 // FP to unsigned is not directly supported on z10. Promoting an i32
6993 // result to (signed) i64 doesn't generate an inexact condition (fp
6994 // exception) for values that are outside the i32 range but in the i64
6995 // range, so use the default expansion.
6996 if (!Subtarget.hasFPExtension() && !IsSigned)
6997 // Expand i32/i64. F16 values will be recognized to fit and extended.
6998 return SDValue();
6999
7000 // Conversion from f16 is done via f32.
7001 if (InOp.getSimpleValueType() == MVT::f16) {
7003 LowerOperationWrapper(Op.getNode(), Results, DAG);
7004 return DAG.getMergeValues(Results, DL);
7005 }
7006
7007 if (VT == MVT::i128) {
7008 RTLIB::Libcall LC =
7009 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
7010 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7011 }
7012
7013 return Op; // Legal
7014}
7015
7016SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
7017 SelectionDAG &DAG) const {
7018 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
7019 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
7020 bool IsStrict = Op->isStrictFPOpcode();
7021 SDLoc DL(Op);
7022 MVT VT = Op.getSimpleValueType();
7023 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
7024 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7025 EVT InVT = InOp.getValueType();
7026
7027 // Conversion to f16 is done via f32.
7028 if (VT == MVT::f16) {
7030 LowerOperationWrapper(Op.getNode(), Results, DAG);
7031 return DAG.getMergeValues(Results, DL);
7032 }
7033
7034 // Unsigned to fp is not directly supported on z10.
7035 if (!Subtarget.hasFPExtension() && !IsSigned)
7036 return SDValue(); // Expand i64.
7037
7038 if (InVT == MVT::i128) {
7039 RTLIB::Libcall LC =
7040 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
7041 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7042 }
7043
7044 return Op; // Legal
7045}
7046
7047// Lower an f16 LOAD in case of no vector support.
7048SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
7049 SelectionDAG &DAG) const {
7050 EVT RegVT = Op.getValueType();
7051 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
7052 (void)RegVT;
7053
7054 // Load as integer.
7055 SDLoc DL(Op);
7056 SDValue NewLd;
7057 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
7058 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
7059 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
7060 AtomicLd->getChain(), AtomicLd->getBasePtr(),
7061 AtomicLd->getMemOperand());
7062 } else {
7063 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
7064 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
7065 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
7066 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
7067 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
7068 }
7069 SDValue F16Val = convertToF16(NewLd, DAG);
7070 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
7071}
7072
7073// Lower an f16 STORE in case of no vector support.
7074SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7075 SelectionDAG &DAG) const {
7076 SDLoc DL(Op);
7077 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
7078
7079 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
7080 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
7081 Shft, AtomicSt->getBasePtr(),
7082 AtomicSt->getMemOperand());
7083
7084 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7085 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7086 St->getMemOperand());
7087}
7088
7089SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7090 SelectionDAG &DAG) const {
7091 SDLoc DL(Op);
7092 MVT ResultVT = Op.getSimpleValueType();
7093 SDValue Arg = Op.getOperand(0);
7094 unsigned Check = Op.getConstantOperandVal(1);
7095
7096 unsigned TDCMask = 0;
7097 if (Check & fcSNan)
7099 if (Check & fcQNan)
7101 if (Check & fcPosInf)
7103 if (Check & fcNegInf)
7105 if (Check & fcPosNormal)
7107 if (Check & fcNegNormal)
7109 if (Check & fcPosSubnormal)
7111 if (Check & fcNegSubnormal)
7113 if (Check & fcPosZero)
7114 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7115 if (Check & fcNegZero)
7116 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7117 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7118
7119 if (Arg.getSimpleValueType() == MVT::f16)
7120 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7121 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7122 return getCCResult(DAG, Intr);
7123}
7124
7125SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7126 SelectionDAG &DAG) const {
7127 SDLoc DL(Op);
7128 SDValue Chain = Op.getOperand(0);
7129
7130 // STCKF only supports a memory operand, so we have to use a temporary.
7131 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7132 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7133 MachinePointerInfo MPI =
7135
7136 // Use STCFK to store the TOD clock into the temporary.
7137 SDValue StoreOps[] = {Chain, StackPtr};
7138 Chain = DAG.getMemIntrinsicNode(
7139 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7140 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7141
7142 // And read it back from there.
7143 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7144}
7145
7147 SelectionDAG &DAG) const {
7148 switch (Op.getOpcode()) {
7149 case ISD::FRAMEADDR:
7150 return lowerFRAMEADDR(Op, DAG);
7151 case ISD::RETURNADDR:
7152 return lowerRETURNADDR(Op, DAG);
7153 case ISD::BR_CC:
7154 return lowerBR_CC(Op, DAG);
7155 case ISD::SELECT_CC:
7156 return lowerSELECT_CC(Op, DAG);
7157 case ISD::SETCC:
7158 return lowerSETCC(Op, DAG);
7159 case ISD::STRICT_FSETCC:
7160 return lowerSTRICT_FSETCC(Op, DAG, false);
7162 return lowerSTRICT_FSETCC(Op, DAG, true);
7163 case ISD::GlobalAddress:
7164 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7166 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7167 case ISD::BlockAddress:
7168 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7169 case ISD::JumpTable:
7170 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7171 case ISD::ConstantPool:
7172 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7173 case ISD::BITCAST:
7174 return lowerBITCAST(Op, DAG);
7175 case ISD::VASTART:
7176 return lowerVASTART(Op, DAG);
7177 case ISD::VACOPY:
7178 return lowerVACOPY(Op, DAG);
7180 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7182 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7183 case ISD::MULHS:
7184 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7185 case ISD::MULHU:
7186 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7187 case ISD::SMUL_LOHI:
7188 return lowerSMUL_LOHI(Op, DAG);
7189 case ISD::UMUL_LOHI:
7190 return lowerUMUL_LOHI(Op, DAG);
7191 case ISD::SDIVREM:
7192 return lowerSDIVREM(Op, DAG);
7193 case ISD::UDIVREM:
7194 return lowerUDIVREM(Op, DAG);
7195 case ISD::SADDO:
7196 case ISD::SSUBO:
7197 case ISD::UADDO:
7198 case ISD::USUBO:
7199 return lowerXALUO(Op, DAG);
7200 case ISD::UADDO_CARRY:
7201 case ISD::USUBO_CARRY:
7202 return lowerUADDSUBO_CARRY(Op, DAG);
7203 case ISD::OR:
7204 return lowerOR(Op, DAG);
7205 case ISD::CTPOP:
7206 return lowerCTPOP(Op, DAG);
7207 case ISD::VECREDUCE_ADD:
7208 return lowerVECREDUCE_ADD(Op, DAG);
7209 case ISD::ATOMIC_FENCE:
7210 return lowerATOMIC_FENCE(Op, DAG);
7211 case ISD::ATOMIC_SWAP:
7212 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7213 case ISD::ATOMIC_STORE:
7214 return lowerATOMIC_STORE(Op, DAG);
7215 case ISD::ATOMIC_LOAD:
7216 return lowerATOMIC_LOAD(Op, DAG);
7218 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7220 return lowerATOMIC_LOAD_SUB(Op, DAG);
7222 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7224 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7226 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7228 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7230 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7232 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7234 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7236 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7238 return lowerATOMIC_CMP_SWAP(Op, DAG);
7239 case ISD::STACKSAVE:
7240 return lowerSTACKSAVE(Op, DAG);
7241 case ISD::STACKRESTORE:
7242 return lowerSTACKRESTORE(Op, DAG);
7243 case ISD::PREFETCH:
7244 return lowerPREFETCH(Op, DAG);
7246 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7248 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7249 case ISD::BUILD_VECTOR:
7250 return lowerBUILD_VECTOR(Op, DAG);
7252 return lowerVECTOR_SHUFFLE(Op, DAG);
7254 return lowerSCALAR_TO_VECTOR(Op, DAG);
7256 return lowerINSERT_VECTOR_ELT(Op, DAG);
7258 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7260 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7262 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7263 case ISD::SHL:
7264 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7265 case ISD::SRL:
7266 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7267 case ISD::SRA:
7268 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7269 case ISD::ADDRSPACECAST:
7270 return lowerAddrSpaceCast(Op, DAG);
7271 case ISD::ROTL:
7272 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7273 case ISD::FSHL:
7274 return lowerFSHL(Op, DAG);
7275 case ISD::FSHR:
7276 return lowerFSHR(Op, DAG);
7277 case ISD::FP_EXTEND:
7279 return lowerFP_EXTEND(Op, DAG);
7280 case ISD::FP_TO_UINT:
7281 case ISD::FP_TO_SINT:
7284 return lower_FP_TO_INT(Op, DAG);
7285 case ISD::UINT_TO_FP:
7286 case ISD::SINT_TO_FP:
7289 return lower_INT_TO_FP(Op, DAG);
7290 case ISD::LOAD:
7291 return lowerLoadF16(Op, DAG);
7292 case ISD::STORE:
7293 return lowerStoreF16(Op, DAG);
7294 case ISD::IS_FPCLASS:
7295 return lowerIS_FPCLASS(Op, DAG);
7296 case ISD::GET_ROUNDING:
7297 return lowerGET_ROUNDING(Op, DAG);
7299 return lowerREADCYCLECOUNTER(Op, DAG);
7302 // These operations are legal on our platform, but we cannot actually
7303 // set the operation action to Legal as common code would treat this
7304 // as equivalent to Expand. Instead, we keep the operation action to
7305 // Custom and just leave them unchanged here.
7306 return Op;
7307
7308 default:
7309 llvm_unreachable("Unexpected node to lower");
7310 }
7311}
7312
7314 const SDLoc &SL) {
7315 // If i128 is legal, just use a normal bitcast.
7316 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7317 return DAG.getBitcast(MVT::f128, Src);
7318
7319 // Otherwise, f128 must live in FP128, so do a partwise move.
7321 &SystemZ::FP128BitRegClass);
7322
7323 SDValue Hi, Lo;
7324 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7325
7326 Hi = DAG.getBitcast(MVT::f64, Hi);
7327 Lo = DAG.getBitcast(MVT::f64, Lo);
7328
7329 SDNode *Pair = DAG.getMachineNode(
7330 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7331 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7332 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7333 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7334 return SDValue(Pair, 0);
7335}
7336
7338 const SDLoc &SL) {
7339 // If i128 is legal, just use a normal bitcast.
7340 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7341 return DAG.getBitcast(MVT::i128, Src);
7342
7343 // Otherwise, f128 must live in FP128, so do a partwise move.
7345 &SystemZ::FP128BitRegClass);
7346
7347 SDValue LoFP =
7348 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7349 SDValue HiFP =
7350 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7351 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7352 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7353
7354 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7355}
7356
7357// Lower operations with invalid operand or result types.
7358void
7361 SelectionDAG &DAG) const {
7362 switch (N->getOpcode()) {
7363 case ISD::ATOMIC_LOAD: {
7364 SDLoc DL(N);
7365 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7366 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7367 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7368 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7369 DL, Tys, Ops, MVT::i128, MMO);
7370
7371 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7372 if (N->getValueType(0) == MVT::f128)
7373 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7374 Results.push_back(Lowered);
7375 Results.push_back(Res.getValue(1));
7376 break;
7377 }
7378 case ISD::ATOMIC_STORE: {
7379 SDLoc DL(N);
7380 SDVTList Tys = DAG.getVTList(MVT::Other);
7381 SDValue Val = N->getOperand(1);
7382 if (Val.getValueType() == MVT::f128)
7383 Val = expandBitCastF128ToI128(DAG, Val, DL);
7384 Val = lowerI128ToGR128(DAG, Val);
7385
7386 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7387 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7388 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7389 DL, Tys, Ops, MVT::i128, MMO);
7390 // We have to enforce sequential consistency by performing a
7391 // serialization operation after the store.
7392 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7394 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7395 MVT::Other, Res), 0);
7396 Results.push_back(Res);
7397 break;
7398 }
7400 SDLoc DL(N);
7401 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7402 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7403 lowerI128ToGR128(DAG, N->getOperand(2)),
7404 lowerI128ToGR128(DAG, N->getOperand(3)) };
7405 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7406 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7407 DL, Tys, Ops, MVT::i128, MMO);
7408 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7410 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7411 Results.push_back(lowerGR128ToI128(DAG, Res));
7412 Results.push_back(Success);
7413 Results.push_back(Res.getValue(2));
7414 break;
7415 }
7416 case ISD::BITCAST: {
7417 if (useSoftFloat())
7418 return;
7419 SDLoc DL(N);
7420 SDValue Src = N->getOperand(0);
7421 EVT SrcVT = Src.getValueType();
7422 EVT ResVT = N->getValueType(0);
7423 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7424 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7425 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7426 if (Subtarget.hasVector()) {
7427 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7428 Results.push_back(SDValue(
7429 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7430 } else {
7431 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7432 Results.push_back(convertToF16(In64, DAG));
7433 }
7434 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7435 SDValue ExtractedVal =
7436 Subtarget.hasVector()
7437 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7438 0)
7439 : convertFromF16(Src, DL, DAG);
7440 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7441 }
7442 break;
7443 }
7444 case ISD::UINT_TO_FP:
7445 case ISD::SINT_TO_FP:
7448 if (useSoftFloat())
7449 return;
7450 bool IsStrict = N->isStrictFPOpcode();
7451 SDLoc DL(N);
7452 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7453 EVT ResVT = N->getValueType(0);
7454 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7455 if (ResVT == MVT::f16) {
7456 if (!IsStrict) {
7457 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7458 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7459 } else {
7460 SDValue OpF32 =
7461 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7462 {Chain, InOp});
7463 SDValue F16Res;
7464 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7465 OpF32, OpF32.getValue(1), DL, MVT::f16);
7466 Results.push_back(F16Res);
7467 Results.push_back(Chain);
7468 }
7469 }
7470 break;
7471 }
7472 case ISD::FP_TO_UINT:
7473 case ISD::FP_TO_SINT:
7476 if (useSoftFloat())
7477 return;
7478 bool IsStrict = N->isStrictFPOpcode();
7479 SDLoc DL(N);
7480 EVT ResVT = N->getValueType(0);
7481 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7482 EVT InVT = InOp->getValueType(0);
7483 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7484 if (InVT == MVT::f16) {
7485 if (!IsStrict) {
7486 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7487 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7488 } else {
7489 SDValue InF32;
7490 std::tie(InF32, Chain) =
7491 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7492 SDValue OpF32 =
7493 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7494 {Chain, InF32});
7495 Results.push_back(OpF32);
7496 Results.push_back(OpF32.getValue(1));
7497 }
7498 }
7499 break;
7500 }
7501 default:
7502 llvm_unreachable("Unexpected node to lower");
7503 }
7504}
7505
7506void
7512
7513// Return true if VT is a vector whose elements are a whole number of bytes
7514// in width. Also check for presence of vector support.
7515bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7516 if (!Subtarget.hasVector())
7517 return false;
7518
7519 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7520}
7521
7522// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7523// producing a result of type ResVT. Op is a possibly bitcast version
7524// of the input vector and Index is the index (based on type VecVT) that
7525// should be extracted. Return the new extraction if a simplification
7526// was possible or if Force is true.
7527SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7528 EVT VecVT, SDValue Op,
7529 unsigned Index,
7530 DAGCombinerInfo &DCI,
7531 bool Force) const {
7532 SelectionDAG &DAG = DCI.DAG;
7533
7534 // The number of bytes being extracted.
7535 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7536
7537 for (;;) {
7538 unsigned Opcode = Op.getOpcode();
7539 if (Opcode == ISD::BITCAST)
7540 // Look through bitcasts.
7541 Op = Op.getOperand(0);
7542 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7543 canTreatAsByteVector(Op.getValueType())) {
7544 // Get a VPERM-like permute mask and see whether the bytes covered
7545 // by the extracted element are a contiguous sequence from one
7546 // source operand.
7548 if (!getVPermMask(Op, Bytes))
7549 break;
7550 int First;
7551 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7552 BytesPerElement, First))
7553 break;
7554 if (First < 0)
7555 return DAG.getUNDEF(ResVT);
7556 // Make sure the contiguous sequence starts at a multiple of the
7557 // original element size.
7558 unsigned Byte = unsigned(First) % Bytes.size();
7559 if (Byte % BytesPerElement != 0)
7560 break;
7561 // We can get the extracted value directly from an input.
7562 Index = Byte / BytesPerElement;
7563 Op = Op.getOperand(unsigned(First) / Bytes.size());
7564 Force = true;
7565 } else if (Opcode == ISD::BUILD_VECTOR &&
7566 canTreatAsByteVector(Op.getValueType())) {
7567 // We can only optimize this case if the BUILD_VECTOR elements are
7568 // at least as wide as the extracted value.
7569 EVT OpVT = Op.getValueType();
7570 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7571 if (OpBytesPerElement < BytesPerElement)
7572 break;
7573 // Make sure that the least-significant bit of the extracted value
7574 // is the least significant bit of an input.
7575 unsigned End = (Index + 1) * BytesPerElement;
7576 if (End % OpBytesPerElement != 0)
7577 break;
7578 // We're extracting the low part of one operand of the BUILD_VECTOR.
7579 Op = Op.getOperand(End / OpBytesPerElement - 1);
7580 if (!Op.getValueType().isInteger()) {
7581 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7582 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7583 DCI.AddToWorklist(Op.getNode());
7584 }
7585 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7586 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7587 if (VT != ResVT) {
7588 DCI.AddToWorklist(Op.getNode());
7589 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7590 }
7591 return Op;
7592 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7594 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7595 canTreatAsByteVector(Op.getValueType()) &&
7596 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7597 // Make sure that only the unextended bits are significant.
7598 EVT ExtVT = Op.getValueType();
7599 EVT OpVT = Op.getOperand(0).getValueType();
7600 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7601 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7602 unsigned Byte = Index * BytesPerElement;
7603 unsigned SubByte = Byte % ExtBytesPerElement;
7604 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7605 if (SubByte < MinSubByte ||
7606 SubByte + BytesPerElement > ExtBytesPerElement)
7607 break;
7608 // Get the byte offset of the unextended element
7609 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7610 // ...then add the byte offset relative to that element.
7611 Byte += SubByte - MinSubByte;
7612 if (Byte % BytesPerElement != 0)
7613 break;
7614 Op = Op.getOperand(0);
7615 Index = Byte / BytesPerElement;
7616 Force = true;
7617 } else
7618 break;
7619 }
7620 if (Force) {
7621 if (Op.getValueType() != VecVT) {
7622 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7623 DCI.AddToWorklist(Op.getNode());
7624 }
7625 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7626 DAG.getConstant(Index, DL, MVT::i32));
7627 }
7628 return SDValue();
7629}
7630
7631// Optimize vector operations in scalar value Op on the basis that Op
7632// is truncated to TruncVT.
7633SDValue SystemZTargetLowering::combineTruncateExtract(
7634 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7635 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7636 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7637 // of type TruncVT.
7638 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7639 TruncVT.getSizeInBits() % 8 == 0) {
7640 SDValue Vec = Op.getOperand(0);
7641 EVT VecVT = Vec.getValueType();
7642 if (canTreatAsByteVector(VecVT)) {
7643 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7644 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7645 unsigned TruncBytes = TruncVT.getStoreSize();
7646 if (BytesPerElement % TruncBytes == 0) {
7647 // Calculate the value of Y' in the above description. We are
7648 // splitting the original elements into Scale equal-sized pieces
7649 // and for truncation purposes want the last (least-significant)
7650 // of these pieces for IndexN. This is easiest to do by calculating
7651 // the start index of the following element and then subtracting 1.
7652 unsigned Scale = BytesPerElement / TruncBytes;
7653 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7654
7655 // Defer the creation of the bitcast from X to combineExtract,
7656 // which might be able to optimize the extraction.
7657 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7658 MVT::getIntegerVT(TruncBytes * 8),
7659 VecVT.getStoreSize() / TruncBytes);
7660 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7661 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7662 }
7663 }
7664 }
7665 }
7666 return SDValue();
7667}
7668
7669SDValue SystemZTargetLowering::combineZERO_EXTEND(
7670 SDNode *N, DAGCombinerInfo &DCI) const {
7671 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7672 SelectionDAG &DAG = DCI.DAG;
7673 SDValue N0 = N->getOperand(0);
7674 EVT VT = N->getValueType(0);
7675 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7676 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7677 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7678 if (TrueOp && FalseOp) {
7679 SDLoc DL(N0);
7680 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7681 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7682 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7683 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7684 // If N0 has multiple uses, change other uses as well.
7685 if (!N0.hasOneUse()) {
7686 SDValue TruncSelect =
7687 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7688 DCI.CombineTo(N0.getNode(), TruncSelect);
7689 }
7690 return NewSelect;
7691 }
7692 }
7693 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7694 // of the result is smaller than the size of X and all the truncated bits
7695 // of X are already zero.
7696 if (N0.getOpcode() == ISD::XOR &&
7697 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7698 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7699 N0.getOperand(1).getOpcode() == ISD::Constant) {
7700 SDValue X = N0.getOperand(0).getOperand(0);
7701 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7702 KnownBits Known = DAG.computeKnownBits(X);
7703 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7704 N0.getValueSizeInBits(),
7705 VT.getSizeInBits());
7706 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7707 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7708 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7709 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7710 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7711 }
7712 }
7713 }
7714 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7715 // and VECTOR ADD COMPUTE CARRY for i128:
7716 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7717 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7718 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7719 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7720 // For vector types, these patterns are recognized in the .td file.
7721 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7722 N0.getOperand(0).getValueType() == VT) {
7723 SDValue Op0 = N0.getOperand(0);
7724 SDValue Op1 = N0.getOperand(1);
7725 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7726 switch (CC) {
7727 case ISD::SETULE:
7728 std::swap(Op0, Op1);
7729 [[fallthrough]];
7730 case ISD::SETUGE:
7731 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7732 case ISD::SETUGT:
7733 std::swap(Op0, Op1);
7734 [[fallthrough]];
7735 case ISD::SETULT:
7736 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7737 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7738 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7739 Op0->getOperand(1));
7740 break;
7741 default:
7742 break;
7743 }
7744 }
7745
7746 return SDValue();
7747}
7748
7749SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7750 SDNode *N, DAGCombinerInfo &DCI) const {
7751 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7752 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7753 // into (select_cc LHS, RHS, -1, 0, COND)
7754 SelectionDAG &DAG = DCI.DAG;
7755 SDValue N0 = N->getOperand(0);
7756 EVT VT = N->getValueType(0);
7757 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7758 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7759 N0 = N0.getOperand(0);
7760 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7761 SDLoc DL(N0);
7762 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7763 DAG.getAllOnesConstant(DL, VT),
7764 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7765 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7766 }
7767 return SDValue();
7768}
7769
7770SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7771 SDNode *N, DAGCombinerInfo &DCI) const {
7772 // Convert (sext (ashr (shl X, C1), C2)) to
7773 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7774 // cheap as narrower ones.
7775 SelectionDAG &DAG = DCI.DAG;
7776 SDValue N0 = N->getOperand(0);
7777 EVT VT = N->getValueType(0);
7778 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7779 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7780 SDValue Inner = N0.getOperand(0);
7781 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7782 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7783 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7784 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7785 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7786 EVT ShiftVT = N0.getOperand(1).getValueType();
7787 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7788 Inner.getOperand(0));
7789 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7790 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7791 ShiftVT));
7792 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7793 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7794 }
7795 }
7796 }
7797
7798 return SDValue();
7799}
7800
7801SDValue SystemZTargetLowering::combineMERGE(
7802 SDNode *N, DAGCombinerInfo &DCI) const {
7803 SelectionDAG &DAG = DCI.DAG;
7804 unsigned Opcode = N->getOpcode();
7805 SDValue Op0 = N->getOperand(0);
7806 SDValue Op1 = N->getOperand(1);
7807 if (Op0.getOpcode() == ISD::BITCAST)
7808 Op0 = Op0.getOperand(0);
7810 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7811 // for v4f32.
7812 if (Op1 == N->getOperand(0))
7813 return Op1;
7814 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7815 EVT VT = Op1.getValueType();
7816 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7817 if (ElemBytes <= 4) {
7818 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7819 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7820 EVT InVT = VT.changeVectorElementTypeToInteger();
7821 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7822 SystemZ::VectorBytes / ElemBytes / 2);
7823 if (VT != InVT) {
7824 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7825 DCI.AddToWorklist(Op1.getNode());
7826 }
7827 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7828 DCI.AddToWorklist(Op.getNode());
7829 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7830 }
7831 }
7832 return SDValue();
7833}
7834
7835static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7836 SDNode *&HiPart) {
7837 LoPart = HiPart = nullptr;
7838
7839 // Scan through all users.
7840 for (SDUse &Use : LD->uses()) {
7841 // Skip the uses of the chain.
7842 if (Use.getResNo() != 0)
7843 continue;
7844
7845 // Verify every user is a TRUNCATE to i64 of the low or high half.
7846 SDNode *User = Use.getUser();
7847 bool IsLoPart = true;
7848 if (User->getOpcode() == ISD::SRL &&
7849 User->getOperand(1).getOpcode() == ISD::Constant &&
7850 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7851 User = *User->user_begin();
7852 IsLoPart = false;
7853 }
7854 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7855 return false;
7856
7857 if (IsLoPart) {
7858 if (LoPart)
7859 return false;
7860 LoPart = User;
7861 } else {
7862 if (HiPart)
7863 return false;
7864 HiPart = User;
7865 }
7866 }
7867 return true;
7868}
7869
7870static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7871 SDNode *&HiPart) {
7872 LoPart = HiPart = nullptr;
7873
7874 // Scan through all users.
7875 for (SDUse &Use : LD->uses()) {
7876 // Skip the uses of the chain.
7877 if (Use.getResNo() != 0)
7878 continue;
7879
7880 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7881 SDNode *User = Use.getUser();
7882 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7883 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7884 return false;
7885
7886 switch (User->getConstantOperandVal(1)) {
7887 case SystemZ::subreg_l64:
7888 if (LoPart)
7889 return false;
7890 LoPart = User;
7891 break;
7892 case SystemZ::subreg_h64:
7893 if (HiPart)
7894 return false;
7895 HiPart = User;
7896 break;
7897 default:
7898 return false;
7899 }
7900 }
7901 return true;
7902}
7903
7904SDValue SystemZTargetLowering::combineLOAD(
7905 SDNode *N, DAGCombinerInfo &DCI) const {
7906 SelectionDAG &DAG = DCI.DAG;
7907 EVT LdVT = N->getValueType(0);
7908 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7909 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7910 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7911 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7912 if (PtrVT != LoadNodeVT) {
7913 SDLoc DL(LN);
7914 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7915 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7916 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7917 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7918 LN->getMemOperand());
7919 }
7920 }
7921 }
7922 SDLoc DL(N);
7923
7924 // Replace a 128-bit load that is used solely to move its value into GPRs
7925 // by separate loads of both halves.
7926 LoadSDNode *LD = cast<LoadSDNode>(N);
7927 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7928 SDNode *LoPart, *HiPart;
7929 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7930 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7931 // Rewrite each extraction as an independent load.
7932 SmallVector<SDValue, 2> ArgChains;
7933 if (HiPart) {
7934 SDValue EltLoad = DAG.getLoad(
7935 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7936 LD->getPointerInfo(), LD->getBaseAlign(),
7937 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7938
7939 DCI.CombineTo(HiPart, EltLoad, true);
7940 ArgChains.push_back(EltLoad.getValue(1));
7941 }
7942 if (LoPart) {
7943 SDValue EltLoad = DAG.getLoad(
7944 LoPart->getValueType(0), DL, LD->getChain(),
7945 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7946 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7947 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7948
7949 DCI.CombineTo(LoPart, EltLoad, true);
7950 ArgChains.push_back(EltLoad.getValue(1));
7951 }
7952
7953 // Collect all chains via TokenFactor.
7954 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7955 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7956 DCI.AddToWorklist(Chain.getNode());
7957 return SDValue(N, 0);
7958 }
7959 }
7960
7961 if (LdVT.isVector() || LdVT.isInteger())
7962 return SDValue();
7963 // Transform a scalar load that is REPLICATEd as well as having other
7964 // use(s) to the form where the other use(s) use the first element of the
7965 // REPLICATE instead of the load. Otherwise instruction selection will not
7966 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7967 // point loads.
7968
7969 SDValue Replicate;
7970 SmallVector<SDNode*, 8> OtherUses;
7971 for (SDUse &Use : N->uses()) {
7972 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7973 if (Replicate)
7974 return SDValue(); // Should never happen
7975 Replicate = SDValue(Use.getUser(), 0);
7976 } else if (Use.getResNo() == 0)
7977 OtherUses.push_back(Use.getUser());
7978 }
7979 if (!Replicate || OtherUses.empty())
7980 return SDValue();
7981
7982 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7983 Replicate, DAG.getConstant(0, DL, MVT::i32));
7984 // Update uses of the loaded Value while preserving old chains.
7985 for (SDNode *U : OtherUses) {
7987 for (SDValue Op : U->ops())
7988 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7989 DAG.UpdateNodeOperands(U, Ops);
7990 }
7991 return SDValue(N, 0);
7992}
7993
7994bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7995 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7996 return true;
7997 if (Subtarget.hasVectorEnhancements2())
7998 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7999 return true;
8000 return false;
8001}
8002
8004 if (!VT.isVector() || !VT.isSimple() ||
8005 VT.getSizeInBits() != 128 ||
8006 VT.getScalarSizeInBits() % 8 != 0)
8007 return false;
8008
8009 unsigned NumElts = VT.getVectorNumElements();
8010 for (unsigned i = 0; i < NumElts; ++i) {
8011 if (M[i] < 0) continue; // ignore UNDEF indices
8012 if ((unsigned) M[i] != NumElts - 1 - i)
8013 return false;
8014 }
8015
8016 return true;
8017}
8018
8019static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8020 for (auto *U : StoredVal->users()) {
8021 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
8022 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8023 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8024 continue;
8025 } else if (isa<BuildVectorSDNode>(U)) {
8026 SDValue BuildVector = SDValue(U, 0);
8027 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
8028 isOnlyUsedByStores(BuildVector, DAG))
8029 continue;
8030 }
8031 return false;
8032 }
8033 return true;
8034}
8035
8036static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8037 SDValue &HiPart) {
8038 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8039 return false;
8040
8041 SDValue Op0 = Val.getOperand(0);
8042 SDValue Op1 = Val.getOperand(1);
8043
8044 if (Op0.getOpcode() == ISD::SHL)
8045 std::swap(Op0, Op1);
8046 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8047 Op1.getOperand(1).getOpcode() != ISD::Constant ||
8048 Op1.getConstantOperandVal(1) != 64)
8049 return false;
8050 Op1 = Op1.getOperand(0);
8051
8052 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8053 Op0.getOperand(0).getValueType() != MVT::i64)
8054 return false;
8055 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8056 Op1.getOperand(0).getValueType() != MVT::i64)
8057 return false;
8058
8059 LoPart = Op0.getOperand(0);
8060 HiPart = Op1.getOperand(0);
8061 return true;
8062}
8063
8064static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8065 SDValue &HiPart) {
8066 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8067 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8068 return false;
8069
8070 if (Val->getNumOperands() != 5 ||
8071 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8072 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8073 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8074 return false;
8075
8076 LoPart = Val->getOperand(1);
8077 HiPart = Val->getOperand(3);
8078 return true;
8079}
8080
8081SDValue SystemZTargetLowering::combineSTORE(
8082 SDNode *N, DAGCombinerInfo &DCI) const {
8083 SelectionDAG &DAG = DCI.DAG;
8084 auto *SN = cast<StoreSDNode>(N);
8085 auto &Op1 = N->getOperand(1);
8086 EVT MemVT = SN->getMemoryVT();
8087
8088 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8089 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8090 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8091 if (PtrVT != StoreNodeVT) {
8092 SDLoc DL(SN);
8093 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8094 SYSTEMZAS::PTR32, 0);
8095 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8096 SN->getPointerInfo(), SN->getBaseAlign(),
8097 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8098 }
8099 }
8100
8101 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8102 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8103 // If X has wider elements then convert it to:
8104 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8105 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8106 if (SDValue Value =
8107 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8108 DCI.AddToWorklist(Value.getNode());
8109
8110 // Rewrite the store with the new form of stored value.
8111 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8112 SN->getBasePtr(), SN->getMemoryVT(),
8113 SN->getMemOperand());
8114 }
8115 }
8116 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8117 if (!SN->isTruncatingStore() &&
8118 Op1.getOpcode() == ISD::BSWAP &&
8119 Op1.getNode()->hasOneUse() &&
8120 canLoadStoreByteSwapped(Op1.getValueType())) {
8121
8122 SDValue BSwapOp = Op1.getOperand(0);
8123
8124 if (BSwapOp.getValueType() == MVT::i16)
8125 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8126
8127 SDValue Ops[] = {
8128 N->getOperand(0), BSwapOp, N->getOperand(2)
8129 };
8130
8131 return
8132 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8133 Ops, MemVT, SN->getMemOperand());
8134 }
8135 // Combine STORE (element-swap) into VSTER
8136 if (!SN->isTruncatingStore() &&
8137 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8138 Op1.getNode()->hasOneUse() &&
8139 Subtarget.hasVectorEnhancements2()) {
8140 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8141 ArrayRef<int> ShuffleMask = SVN->getMask();
8142 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8143 SDValue Ops[] = {
8144 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8145 };
8146
8147 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8148 DAG.getVTList(MVT::Other),
8149 Ops, MemVT, SN->getMemOperand());
8150 }
8151 }
8152
8153 // Combine STORE (READCYCLECOUNTER) into STCKF.
8154 if (!SN->isTruncatingStore() &&
8156 Op1.hasOneUse() &&
8157 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8158 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8159 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8160 DAG.getVTList(MVT::Other),
8161 Ops, MemVT, SN->getMemOperand());
8162 }
8163
8164 // Transform a store of a 128-bit value moved from parts into two stores.
8165 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8166 SDValue LoPart, HiPart;
8167 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8168 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8169 SDLoc DL(SN);
8170 SDValue Chain0 = DAG.getStore(
8171 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8172 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8173 SDValue Chain1 = DAG.getStore(
8174 SN->getChain(), DL, LoPart,
8175 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8176 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8177 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8178
8179 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8180 }
8181 }
8182
8183 // Replicate a reg or immediate with VREP instead of scalar multiply or
8184 // immediate load. It seems best to do this during the first DAGCombine as
8185 // it is straight-forward to handle the zero-extend node in the initial
8186 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8187 // extracting an i16 element from a v16i8 vector).
8188 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8189 isOnlyUsedByStores(Op1, DAG)) {
8190 SDValue Word = SDValue();
8191 EVT WordVT;
8192
8193 // Find a replicated immediate and return it if found in Word and its
8194 // type in WordVT.
8195 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8196 // Some constants are better handled with a scalar store.
8197 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8198 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8199 return;
8200
8201 APInt Val = C->getAPIntValue();
8202 // Truncate Val in case of a truncating store.
8203 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8204 assert(SN->isTruncatingStore() &&
8205 "Non-truncating store and immediate value does not fit?");
8206 Val = Val.trunc(TotBytes * 8);
8207 }
8208
8209 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8210 if (VCI.isVectorConstantLegal(Subtarget) &&
8211 VCI.Opcode == SystemZISD::REPLICATE) {
8212 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8213 WordVT = VCI.VecVT.getScalarType();
8214 }
8215 };
8216
8217 // Find a replicated register and return it if found in Word and its type
8218 // in WordVT.
8219 auto FindReplicatedReg = [&](SDValue MulOp) {
8220 EVT MulVT = MulOp.getValueType();
8221 if (MulOp->getOpcode() == ISD::MUL &&
8222 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8223 // Find a zero extended value and its type.
8224 SDValue LHS = MulOp->getOperand(0);
8225 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8226 WordVT = LHS->getOperand(0).getValueType();
8227 else if (LHS->getOpcode() == ISD::AssertZext)
8228 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8229 else
8230 return;
8231 // Find a replicating constant, e.g. 0x00010001.
8232 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8233 SystemZVectorConstantInfo VCI(
8234 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8235 if (VCI.isVectorConstantLegal(Subtarget) &&
8236 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8237 WordVT == VCI.VecVT.getScalarType())
8238 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8239 }
8240 }
8241 };
8242
8243 if (isa<BuildVectorSDNode>(Op1) &&
8244 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8245 SDValue SplatVal = Op1->getOperand(0);
8246 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8247 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8248 else
8249 FindReplicatedReg(SplatVal);
8250 } else {
8251 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8252 FindReplicatedImm(C, MemVT.getStoreSize());
8253 else
8254 FindReplicatedReg(Op1);
8255 }
8256
8257 if (Word != SDValue()) {
8258 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8259 "Bad type handling");
8260 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8261 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8262 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8263 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8264 SN->getBasePtr(), SN->getMemOperand());
8265 }
8266 }
8267
8268 return SDValue();
8269}
8270
8271SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8272 SDNode *N, DAGCombinerInfo &DCI) const {
8273 SelectionDAG &DAG = DCI.DAG;
8274 // Combine element-swap (LOAD) into VLER
8275 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8276 N->getOperand(0).hasOneUse() &&
8277 Subtarget.hasVectorEnhancements2()) {
8278 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8279 ArrayRef<int> ShuffleMask = SVN->getMask();
8280 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8281 SDValue Load = N->getOperand(0);
8282 LoadSDNode *LD = cast<LoadSDNode>(Load);
8283
8284 // Create the element-swapping load.
8285 SDValue Ops[] = {
8286 LD->getChain(), // Chain
8287 LD->getBasePtr() // Ptr
8288 };
8289 SDValue ESLoad =
8290 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8291 DAG.getVTList(LD->getValueType(0), MVT::Other),
8292 Ops, LD->getMemoryVT(), LD->getMemOperand());
8293
8294 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8295 // by the load dead.
8296 DCI.CombineTo(N, ESLoad);
8297
8298 // Next, combine the load away, we give it a bogus result value but a real
8299 // chain result. The result value is dead because the shuffle is dead.
8300 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8301
8302 // Return N so it doesn't get rechecked!
8303 return SDValue(N, 0);
8304 }
8305 }
8306
8307 return SDValue();
8308}
8309
8310SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8311 SDNode *N, DAGCombinerInfo &DCI) const {
8312 SelectionDAG &DAG = DCI.DAG;
8313
8314 if (!Subtarget.hasVector())
8315 return SDValue();
8316
8317 // Look through bitcasts that retain the number of vector elements.
8318 SDValue Op = N->getOperand(0);
8319 if (Op.getOpcode() == ISD::BITCAST &&
8320 Op.getValueType().isVector() &&
8321 Op.getOperand(0).getValueType().isVector() &&
8322 Op.getValueType().getVectorNumElements() ==
8323 Op.getOperand(0).getValueType().getVectorNumElements())
8324 Op = Op.getOperand(0);
8325
8326 // Pull BSWAP out of a vector extraction.
8327 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8328 EVT VecVT = Op.getValueType();
8329 EVT EltVT = VecVT.getVectorElementType();
8330 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8331 Op.getOperand(0), N->getOperand(1));
8332 DCI.AddToWorklist(Op.getNode());
8333 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8334 if (EltVT != N->getValueType(0)) {
8335 DCI.AddToWorklist(Op.getNode());
8336 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8337 }
8338 return Op;
8339 }
8340
8341 // Try to simplify a vector extraction.
8342 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8343 SDValue Op0 = N->getOperand(0);
8344 EVT VecVT = Op0.getValueType();
8345 if (canTreatAsByteVector(VecVT))
8346 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8347 IndexN->getZExtValue(), DCI, false);
8348 }
8349 return SDValue();
8350}
8351
8352SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8353 SDNode *N, DAGCombinerInfo &DCI) const {
8354 SelectionDAG &DAG = DCI.DAG;
8355 // (join_dwords X, X) == (replicate X)
8356 if (N->getOperand(0) == N->getOperand(1))
8357 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8358 N->getOperand(0));
8359 return SDValue();
8360}
8361
8363 SDValue Chain1 = N1->getOperand(0);
8364 SDValue Chain2 = N2->getOperand(0);
8365
8366 // Trivial case: both nodes take the same chain.
8367 if (Chain1 == Chain2)
8368 return Chain1;
8369
8370 // FIXME - we could handle more complex cases via TokenFactor,
8371 // assuming we can verify that this would not create a cycle.
8372 return SDValue();
8373}
8374
8375SDValue SystemZTargetLowering::combineFP_ROUND(
8376 SDNode *N, DAGCombinerInfo &DCI) const {
8377
8378 if (!Subtarget.hasVector())
8379 return SDValue();
8380
8381 // (fpround (extract_vector_elt X 0))
8382 // (fpround (extract_vector_elt X 1)) ->
8383 // (extract_vector_elt (VROUND X) 0)
8384 // (extract_vector_elt (VROUND X) 2)
8385 //
8386 // This is a special case since the target doesn't really support v2f32s.
8387 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8388 SelectionDAG &DAG = DCI.DAG;
8389 SDValue Op0 = N->getOperand(OpNo);
8390 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8392 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8393 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8394 Op0.getConstantOperandVal(1) == 0) {
8395 SDValue Vec = Op0.getOperand(0);
8396 for (auto *U : Vec->users()) {
8397 if (U != Op0.getNode() && U->hasOneUse() &&
8398 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8399 U->getOperand(0) == Vec &&
8400 U->getOperand(1).getOpcode() == ISD::Constant &&
8401 U->getConstantOperandVal(1) == 1) {
8402 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8403 if (OtherRound.getOpcode() == N->getOpcode() &&
8404 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8405 OtherRound.getValueType() == MVT::f32) {
8406 SDValue VRound, Chain;
8407 if (N->isStrictFPOpcode()) {
8408 Chain = MergeInputChains(N, OtherRound.getNode());
8409 if (!Chain)
8410 continue;
8411 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8412 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8413 Chain = VRound.getValue(1);
8414 } else
8415 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8416 MVT::v4f32, Vec);
8417 DCI.AddToWorklist(VRound.getNode());
8418 SDValue Extract1 =
8419 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8420 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8421 DCI.AddToWorklist(Extract1.getNode());
8422 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8423 if (Chain)
8424 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8425 SDValue Extract0 =
8426 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8427 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8428 if (Chain)
8429 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8430 N->getVTList(), Extract0, Chain);
8431 return Extract0;
8432 }
8433 }
8434 }
8435 }
8436 return SDValue();
8437}
8438
8439SDValue SystemZTargetLowering::combineFP_EXTEND(
8440 SDNode *N, DAGCombinerInfo &DCI) const {
8441
8442 if (!Subtarget.hasVector())
8443 return SDValue();
8444
8445 // (fpextend (extract_vector_elt X 0))
8446 // (fpextend (extract_vector_elt X 2)) ->
8447 // (extract_vector_elt (VEXTEND X) 0)
8448 // (extract_vector_elt (VEXTEND X) 1)
8449 //
8450 // This is a special case since the target doesn't really support v2f32s.
8451 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8452 SelectionDAG &DAG = DCI.DAG;
8453 SDValue Op0 = N->getOperand(OpNo);
8454 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8456 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8457 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8458 Op0.getConstantOperandVal(1) == 0) {
8459 SDValue Vec = Op0.getOperand(0);
8460 for (auto *U : Vec->users()) {
8461 if (U != Op0.getNode() && U->hasOneUse() &&
8462 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8463 U->getOperand(0) == Vec &&
8464 U->getOperand(1).getOpcode() == ISD::Constant &&
8465 U->getConstantOperandVal(1) == 2) {
8466 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8467 if (OtherExtend.getOpcode() == N->getOpcode() &&
8468 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8469 OtherExtend.getValueType() == MVT::f64) {
8470 SDValue VExtend, Chain;
8471 if (N->isStrictFPOpcode()) {
8472 Chain = MergeInputChains(N, OtherExtend.getNode());
8473 if (!Chain)
8474 continue;
8475 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8476 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8477 Chain = VExtend.getValue(1);
8478 } else
8479 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8480 MVT::v2f64, Vec);
8481 DCI.AddToWorklist(VExtend.getNode());
8482 SDValue Extract1 =
8483 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8484 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8485 DCI.AddToWorklist(Extract1.getNode());
8486 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8487 if (Chain)
8488 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8489 SDValue Extract0 =
8490 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8491 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8492 if (Chain)
8493 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8494 N->getVTList(), Extract0, Chain);
8495 return Extract0;
8496 }
8497 }
8498 }
8499 }
8500 return SDValue();
8501}
8502
8503SDValue SystemZTargetLowering::combineINT_TO_FP(
8504 SDNode *N, DAGCombinerInfo &DCI) const {
8505 if (DCI.Level != BeforeLegalizeTypes)
8506 return SDValue();
8507 SelectionDAG &DAG = DCI.DAG;
8508 LLVMContext &Ctx = *DAG.getContext();
8509 unsigned Opcode = N->getOpcode();
8510 EVT OutVT = N->getValueType(0);
8511 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8512 SDValue Op = N->getOperand(0);
8513 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8514 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8515
8516 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8517 // v2f64 = uint_to_fp v2i16
8518 // =>
8519 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8520 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8521 OutScalarBits <= 64) {
8522 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8523 EVT ExtVT = EVT::getVectorVT(
8524 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8525 unsigned ExtOpcode =
8527 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8528 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8529 }
8530 return SDValue();
8531}
8532
8533SDValue SystemZTargetLowering::combineFCOPYSIGN(
8534 SDNode *N, DAGCombinerInfo &DCI) const {
8535 SelectionDAG &DAG = DCI.DAG;
8536 EVT VT = N->getValueType(0);
8537 SDValue ValOp = N->getOperand(0);
8538 SDValue SignOp = N->getOperand(1);
8539
8540 // Remove the rounding which is not needed.
8541 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8542 SDValue WideOp = SignOp.getOperand(0);
8543 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8544 }
8545
8546 return SDValue();
8547}
8548
8549SDValue SystemZTargetLowering::combineBSWAP(
8550 SDNode *N, DAGCombinerInfo &DCI) const {
8551 SelectionDAG &DAG = DCI.DAG;
8552 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8553 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8554 N->getOperand(0).hasOneUse() &&
8555 canLoadStoreByteSwapped(N->getValueType(0))) {
8556 SDValue Load = N->getOperand(0);
8557 LoadSDNode *LD = cast<LoadSDNode>(Load);
8558
8559 // Create the byte-swapping load.
8560 SDValue Ops[] = {
8561 LD->getChain(), // Chain
8562 LD->getBasePtr() // Ptr
8563 };
8564 EVT LoadVT = N->getValueType(0);
8565 if (LoadVT == MVT::i16)
8566 LoadVT = MVT::i32;
8567 SDValue BSLoad =
8568 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8569 DAG.getVTList(LoadVT, MVT::Other),
8570 Ops, LD->getMemoryVT(), LD->getMemOperand());
8571
8572 // If this is an i16 load, insert the truncate.
8573 SDValue ResVal = BSLoad;
8574 if (N->getValueType(0) == MVT::i16)
8575 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8576
8577 // First, combine the bswap away. This makes the value produced by the
8578 // load dead.
8579 DCI.CombineTo(N, ResVal);
8580
8581 // Next, combine the load away, we give it a bogus result value but a real
8582 // chain result. The result value is dead because the bswap is dead.
8583 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8584
8585 // Return N so it doesn't get rechecked!
8586 return SDValue(N, 0);
8587 }
8588
8589 // Look through bitcasts that retain the number of vector elements.
8590 SDValue Op = N->getOperand(0);
8591 if (Op.getOpcode() == ISD::BITCAST &&
8592 Op.getValueType().isVector() &&
8593 Op.getOperand(0).getValueType().isVector() &&
8594 Op.getValueType().getVectorNumElements() ==
8595 Op.getOperand(0).getValueType().getVectorNumElements())
8596 Op = Op.getOperand(0);
8597
8598 // Push BSWAP into a vector insertion if at least one side then simplifies.
8599 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8600 SDValue Vec = Op.getOperand(0);
8601 SDValue Elt = Op.getOperand(1);
8602 SDValue Idx = Op.getOperand(2);
8603
8605 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8607 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8608 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8609 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8610 EVT VecVT = N->getValueType(0);
8611 EVT EltVT = N->getValueType(0).getVectorElementType();
8612 if (VecVT != Vec.getValueType()) {
8613 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8614 DCI.AddToWorklist(Vec.getNode());
8615 }
8616 if (EltVT != Elt.getValueType()) {
8617 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8618 DCI.AddToWorklist(Elt.getNode());
8619 }
8620 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8621 DCI.AddToWorklist(Vec.getNode());
8622 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8623 DCI.AddToWorklist(Elt.getNode());
8624 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8625 Vec, Elt, Idx);
8626 }
8627 }
8628
8629 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8630 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8631 if (SV && Op.hasOneUse()) {
8632 SDValue Op0 = Op.getOperand(0);
8633 SDValue Op1 = Op.getOperand(1);
8634
8636 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8638 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8639 EVT VecVT = N->getValueType(0);
8640 if (VecVT != Op0.getValueType()) {
8641 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8642 DCI.AddToWorklist(Op0.getNode());
8643 }
8644 if (VecVT != Op1.getValueType()) {
8645 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8646 DCI.AddToWorklist(Op1.getNode());
8647 }
8648 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8649 DCI.AddToWorklist(Op0.getNode());
8650 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8651 DCI.AddToWorklist(Op1.getNode());
8652 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8653 }
8654 }
8655
8656 return SDValue();
8657}
8658
8659SDValue SystemZTargetLowering::combineSETCC(
8660 SDNode *N, DAGCombinerInfo &DCI) const {
8661 SelectionDAG &DAG = DCI.DAG;
8662 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8663 const SDValue LHS = N->getOperand(0);
8664 const SDValue RHS = N->getOperand(1);
8665 bool CmpNull = isNullConstant(RHS);
8666 bool CmpAllOnes = isAllOnesConstant(RHS);
8667 EVT VT = N->getValueType(0);
8668 SDLoc DL(N);
8669
8670 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8671 // change the outer compare to a i128 compare. This will normally
8672 // allow the reduction to be recognized in adjustICmp128, and even if
8673 // not, the i128 compare will still generate better code.
8674 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8676 if (Src.getOpcode() == ISD::SETCC &&
8677 Src.getValueType().isFixedLengthVector() &&
8678 Src.getValueType().getScalarType() == MVT::i1) {
8679 EVT CmpVT = Src.getOperand(0).getValueType();
8680 if (CmpVT.getSizeInBits() == 128) {
8681 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8682 SDValue LHS =
8683 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8684 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8685 : DAG.getAllOnesConstant(DL, MVT::i128);
8686 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8687 N->getFlags());
8688 }
8689 }
8690 }
8691
8692 return SDValue();
8693}
8694
8695static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8696 switch (Val.getOpcode()) {
8697 default:
8698 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8699 case SystemZISD::IPM:
8700 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8701 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8702 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8703 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8704 case SystemZISD::SELECT_CCMASK: {
8705 SDValue Op4CCReg = Val.getOperand(4);
8706 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8707 Op4CCReg.getOpcode() == SystemZISD::TM) {
8708 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8709 if (OpCC != SDValue())
8710 return std::make_pair(OpCC, OpCCValid);
8711 }
8712 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8713 if (!CCValid)
8714 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8715 int CCValidVal = CCValid->getZExtValue();
8716 return std::make_pair(Op4CCReg, CCValidVal);
8717 }
8718 case ISD::ADD:
8719 case ISD::AND:
8720 case ISD::OR:
8721 case ISD::XOR:
8722 case ISD::SHL:
8723 case ISD::SRA:
8724 case ISD::SRL:
8725 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8726 if (Op0CC != SDValue())
8727 return std::make_pair(Op0CC, Op0CCValid);
8728 return findCCUse(Val.getOperand(1));
8729 }
8730}
8731
8732static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8733 SelectionDAG &DAG);
8734
8736 SelectionDAG &DAG) {
8737 SDLoc DL(Val);
8738 auto Opcode = Val.getOpcode();
8739 switch (Opcode) {
8740 default:
8741 return {};
8742 case ISD::Constant:
8743 return {Val, Val, Val, Val};
8744 case SystemZISD::IPM: {
8745 SDValue IPMOp0 = Val.getOperand(0);
8746 if (IPMOp0 != CC)
8747 return {};
8748 SmallVector<SDValue, 4> ShiftedCCVals;
8749 for (auto CC : {0, 1, 2, 3})
8750 ShiftedCCVals.emplace_back(
8751 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8752 return ShiftedCCVals;
8753 }
8754 case SystemZISD::SELECT_CCMASK: {
8755 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8756 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8757 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8758 if (!CCValid || !CCMask)
8759 return {};
8760
8761 int CCValidVal = CCValid->getZExtValue();
8762 int CCMaskVal = CCMask->getZExtValue();
8763 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8764 // recursive call to simplifyAssumingCCVal.
8765 SDValue Op4CCReg = Val.getOperand(4);
8766 if (Op4CCReg != CC)
8767 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8768 if (Op4CCReg != CC)
8769 return {};
8770 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8771 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8772 if (TrueSDVals.empty() || FalseSDVals.empty())
8773 return {};
8774 SmallVector<SDValue, 4> MergedSDVals;
8775 for (auto &CCVal : {0, 1, 2, 3})
8776 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8777 ? TrueSDVals[CCVal]
8778 : FalseSDVals[CCVal]);
8779 return MergedSDVals;
8780 }
8781 case ISD::ADD:
8782 case ISD::AND:
8783 case ISD::OR:
8784 case ISD::XOR:
8785 case ISD::SRA:
8786 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8787 // would clobber CC).
8788 if (!Val.hasOneUse())
8789 return {};
8790 [[fallthrough]];
8791 case ISD::SHL:
8792 case ISD::SRL:
8793 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8794 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8795 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8796 if (Op0SDVals.empty() || Op1SDVals.empty())
8797 return {};
8798 SmallVector<SDValue, 4> BinaryOpSDVals;
8799 for (auto CCVal : {0, 1, 2, 3})
8800 BinaryOpSDVals.emplace_back(DAG.getNode(
8801 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8802 return BinaryOpSDVals;
8803 }
8804}
8805
8806static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8807 SelectionDAG &DAG) {
8808 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8809 // set by the CCReg instruction using the CCValid / CCMask masks,
8810 // If the CCReg instruction is itself a ICMP / TM testing the condition
8811 // code set by some other instruction, see whether we can directly
8812 // use that condition code.
8813 auto *CCNode = CCReg.getNode();
8814 if (!CCNode)
8815 return false;
8816
8817 if (CCNode->getOpcode() == SystemZISD::TM) {
8818 if (CCValid != SystemZ::CCMASK_TM)
8819 return false;
8820 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8821 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8822 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8823 if (!Op0Node || !Op1Node)
8824 return -1;
8825 auto Op0APVal = Op0Node->getAPIntValue();
8826 auto Op1APVal = Op1Node->getAPIntValue();
8827 auto Result = Op0APVal & Op1APVal;
8828 bool AllOnes = Result == Op1APVal;
8829 bool AllZeros = Result == 0;
8830 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8831 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8832 };
8833 SDValue Op0 = CCNode->getOperand(0);
8834 SDValue Op1 = CCNode->getOperand(1);
8835 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8836 if (Op0CC == SDValue())
8837 return false;
8838 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8839 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8840 if (Op0SDVals.empty() || Op1SDVals.empty())
8841 return false;
8842 int NewCCMask = 0;
8843 for (auto CC : {0, 1, 2, 3}) {
8844 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8845 if (CCVal < 0)
8846 return false;
8847 NewCCMask <<= 1;
8848 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8849 }
8850 NewCCMask &= Op0CCValid;
8851 CCReg = Op0CC;
8852 CCMask = NewCCMask;
8853 CCValid = Op0CCValid;
8854 return true;
8855 }
8856 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8857 CCValid != SystemZ::CCMASK_ICMP)
8858 return false;
8859
8860 SDValue CmpOp0 = CCNode->getOperand(0);
8861 SDValue CmpOp1 = CCNode->getOperand(1);
8862 SDValue CmpOp2 = CCNode->getOperand(2);
8863 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8864 if (Op0CC != SDValue()) {
8865 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8866 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8867 if (Op0SDVals.empty() || Op1SDVals.empty())
8868 return false;
8869
8870 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8871 auto CmpTypeVal = CmpType->getZExtValue();
8872 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8873 const SDValue &Op1Val) {
8874 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8875 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8876 if (!Op0Node || !Op1Node)
8877 return -1;
8878 auto Op0APVal = Op0Node->getAPIntValue();
8879 auto Op1APVal = Op1Node->getAPIntValue();
8880 if (CmpTypeVal == SystemZICMP::SignedOnly)
8881 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8882 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8883 };
8884 int NewCCMask = 0;
8885 for (auto CC : {0, 1, 2, 3}) {
8886 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8887 if (CCVal < 0)
8888 return false;
8889 NewCCMask <<= 1;
8890 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8891 }
8892 NewCCMask &= Op0CCValid;
8893 CCMask = NewCCMask;
8894 CCReg = Op0CC;
8895 CCValid = Op0CCValid;
8896 return true;
8897 }
8898
8899 return false;
8900}
8901
8902// Merging versus split in multiple branches cost.
8905 const Value *Lhs,
8906 const Value *Rhs) const {
8907 const auto isFlagOutOpCC = [](const Value *V) {
8908 using namespace llvm::PatternMatch;
8909 const Value *RHSVal;
8910 const APInt *RHSC;
8911 if (const auto *I = dyn_cast<Instruction>(V)) {
8912 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8913 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8914 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8915 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8916 if (CB->isInlineAsm()) {
8917 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8918 return IA && IA->getConstraintString().contains("{@cc}");
8919 }
8920 }
8921 }
8922 }
8923 return false;
8924 };
8925 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8926 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8927 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8928 // conditionals will be merged or else conditionals will be split.
8929 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8930 return {3, 0, -1};
8931 // Default.
8932 return {-1, -1, -1};
8933}
8934
8935SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8936 DAGCombinerInfo &DCI) const {
8937 SelectionDAG &DAG = DCI.DAG;
8938
8939 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8940 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8941 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8942 if (!CCValid || !CCMask)
8943 return SDValue();
8944
8945 int CCValidVal = CCValid->getZExtValue();
8946 int CCMaskVal = CCMask->getZExtValue();
8947 SDValue Chain = N->getOperand(0);
8948 SDValue CCReg = N->getOperand(4);
8949 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
8950 // the modified BR_CCMASK with the new values.
8951 // In order to avoid conditional branches with full or empty cc masks, do not
8952 // do this if ccmask is 0 or equal to ccvalid.
8953 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 &&
8954 CCMaskVal != CCValidVal)
8955 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8956 Chain,
8957 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8958 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8959 N->getOperand(3), CCReg);
8960 return SDValue();
8961}
8962
8963SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8964 SDNode *N, DAGCombinerInfo &DCI) const {
8965 SelectionDAG &DAG = DCI.DAG;
8966
8967 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8968 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8969 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8970 if (!CCValid || !CCMask)
8971 return SDValue();
8972
8973 int CCValidVal = CCValid->getZExtValue();
8974 int CCMaskVal = CCMask->getZExtValue();
8975 SDValue CCReg = N->getOperand(4);
8976
8977 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
8978
8979 // Populate SDVals vector for each condition code ccval for given Val, which
8980 // can again be another nested select_ccmask with the same CC.
8981 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
8982 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
8984 if (Val.getOperand(4) != CCReg)
8985 return SmallVector<SDValue, 4>{};
8986 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8987 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8988 if (!CCMask)
8989 return SmallVector<SDValue, 4>{};
8990
8991 int CCMaskVal = CCMask->getZExtValue();
8992 for (auto &CC : {0, 1, 2, 3})
8993 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
8994 : FalseVal);
8995 return Res;
8996 }
8997 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
8998 };
8999 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
9000 // with CCReg found by combineCCMask or original CCReg.
9001 SDValue TrueVal = N->getOperand(0);
9002 SDValue FalseVal = N->getOperand(1);
9003 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
9004 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
9005 // TrueSDVals/FalseSDVals might be empty in case of non-constant
9006 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
9007 if (TrueSDVals.empty())
9008 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
9009 if (FalseSDVals.empty())
9010 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
9011 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
9012 SmallSet<SDValue, 4> MergedSDValsSet;
9013 // Ignoring CC values outside CCValiid.
9014 for (auto CC : {0, 1, 2, 3}) {
9015 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
9016 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
9017 ? TrueSDVals[CC]
9018 : FalseSDVals[CC]);
9019 }
9020 if (MergedSDValsSet.size() == 1)
9021 return *MergedSDValsSet.begin();
9022 if (MergedSDValsSet.size() == 2) {
9023 auto BeginIt = MergedSDValsSet.begin();
9024 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
9025 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
9026 std::swap(NewTrueVal, NewFalseVal);
9027 int NewCCMask = 0;
9028 for (auto CC : {0, 1, 2, 3}) {
9029 NewCCMask <<= 1;
9030 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
9031 ? (TrueSDVals[CC] == NewTrueVal)
9032 : (FalseSDVals[CC] == NewTrueVal);
9033 }
9034 CCMaskVal = NewCCMask;
9035 CCMaskVal &= CCValidVal;
9036 TrueVal = NewTrueVal;
9037 FalseVal = NewFalseVal;
9038 IsCombinedCCReg = true;
9039 }
9040 }
9041 // If the condition is trivially false or trivially true after
9042 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
9043 // (possibly modified by constructCCSDValsFromSELECT).
9044 if (CCMaskVal == 0)
9045 return FalseVal;
9046 if (CCMaskVal == CCValidVal)
9047 return TrueVal;
9048
9049 if (IsCombinedCCReg)
9050 return DAG.getNode(
9051 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
9052 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9053 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
9054
9055 return SDValue();
9056}
9057
9058SDValue SystemZTargetLowering::combineGET_CCMASK(
9059 SDNode *N, DAGCombinerInfo &DCI) const {
9060
9061 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9062 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9063 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9064 if (!CCValid || !CCMask)
9065 return SDValue();
9066 int CCValidVal = CCValid->getZExtValue();
9067 int CCMaskVal = CCMask->getZExtValue();
9068
9069 SDValue Select = N->getOperand(0);
9070 if (Select->getOpcode() == ISD::TRUNCATE)
9071 Select = Select->getOperand(0);
9072 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9073 return SDValue();
9074
9075 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9076 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9077 if (!SelectCCValid || !SelectCCMask)
9078 return SDValue();
9079 int SelectCCValidVal = SelectCCValid->getZExtValue();
9080 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9081
9082 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9083 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9084 if (!TrueVal || !FalseVal)
9085 return SDValue();
9086 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9087 ;
9088 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9089 SelectCCMaskVal ^= SelectCCValidVal;
9090 else
9091 return SDValue();
9092
9093 if (SelectCCValidVal & ~CCValidVal)
9094 return SDValue();
9095 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9096 return SDValue();
9097
9098 return Select->getOperand(4);
9099}
9100
9101SDValue SystemZTargetLowering::combineIntDIVREM(
9102 SDNode *N, DAGCombinerInfo &DCI) const {
9103 SelectionDAG &DAG = DCI.DAG;
9104 EVT VT = N->getValueType(0);
9105 // In the case where the divisor is a vector of constants a cheaper
9106 // sequence of instructions can replace the divide. BuildSDIV is called to
9107 // do this during DAG combining, but it only succeeds when it can build a
9108 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9109 // since it is not Legal but Custom it can only happen before
9110 // legalization. Therefore we must scalarize this early before Combine
9111 // 1. For widened vectors, this is already the result of type legalization.
9112 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9113 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9114 return DAG.UnrollVectorOp(N);
9115 return SDValue();
9116}
9117
9118
9119// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9120// This is closely modeled after the common-code combineShiftToMULH.
9121SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9122 SDNode *N, DAGCombinerInfo &DCI) const {
9123 SelectionDAG &DAG = DCI.DAG;
9124 SDLoc DL(N);
9125
9126 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9127 "SRL or SRA node is required here!");
9128
9129 if (!Subtarget.hasVector())
9130 return SDValue();
9131
9132 // Check the shift amount. Proceed with the transformation if the shift
9133 // amount is constant.
9134 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9135 if (!ShiftAmtSrc)
9136 return SDValue();
9137
9138 // The operation feeding into the shift must be an add.
9139 SDValue ShiftOperand = N->getOperand(0);
9140 if (ShiftOperand.getOpcode() != ISD::ADD)
9141 return SDValue();
9142
9143 // One operand of the add must be a multiply.
9144 SDValue MulOp = ShiftOperand.getOperand(0);
9145 SDValue AddOp = ShiftOperand.getOperand(1);
9146 if (MulOp.getOpcode() != ISD::MUL) {
9147 if (AddOp.getOpcode() != ISD::MUL)
9148 return SDValue();
9149 std::swap(MulOp, AddOp);
9150 }
9151
9152 // All operands must be equivalent extend nodes.
9153 SDValue LeftOp = MulOp.getOperand(0);
9154 SDValue RightOp = MulOp.getOperand(1);
9155
9156 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9157 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9158
9159 if (!IsSignExt && !IsZeroExt)
9160 return SDValue();
9161
9162 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9163 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9164
9165 SDValue MulhRightOp;
9166 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9167 unsigned ActiveBits = IsSignExt
9168 ? Constant->getAPIntValue().getSignificantBits()
9169 : Constant->getAPIntValue().getActiveBits();
9170 if (ActiveBits > NarrowVTSize)
9171 return SDValue();
9172 MulhRightOp = DAG.getConstant(
9173 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9174 NarrowVT);
9175 } else {
9176 if (LeftOp.getOpcode() != RightOp.getOpcode())
9177 return SDValue();
9178 // Check that the two extend nodes are the same type.
9179 if (NarrowVT != RightOp.getOperand(0).getValueType())
9180 return SDValue();
9181 MulhRightOp = RightOp.getOperand(0);
9182 }
9183
9184 SDValue MulhAddOp;
9185 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9186 unsigned ActiveBits = IsSignExt
9187 ? Constant->getAPIntValue().getSignificantBits()
9188 : Constant->getAPIntValue().getActiveBits();
9189 if (ActiveBits > NarrowVTSize)
9190 return SDValue();
9191 MulhAddOp = DAG.getConstant(
9192 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9193 NarrowVT);
9194 } else {
9195 if (LeftOp.getOpcode() != AddOp.getOpcode())
9196 return SDValue();
9197 // Check that the two extend nodes are the same type.
9198 if (NarrowVT != AddOp.getOperand(0).getValueType())
9199 return SDValue();
9200 MulhAddOp = AddOp.getOperand(0);
9201 }
9202
9203 EVT WideVT = LeftOp.getValueType();
9204 // Proceed with the transformation if the wide types match.
9205 assert((WideVT == RightOp.getValueType()) &&
9206 "Cannot have a multiply node with two different operand types.");
9207 assert((WideVT == AddOp.getValueType()) &&
9208 "Cannot have an add node with two different operand types.");
9209
9210 // Proceed with the transformation if the wide type is twice as large
9211 // as the narrow type.
9212 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9213 return SDValue();
9214
9215 // Check the shift amount with the narrow type size.
9216 // Proceed with the transformation if the shift amount is the width
9217 // of the narrow type.
9218 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9219 if (ShiftAmt != NarrowVTSize)
9220 return SDValue();
9221
9222 // Proceed if we support the multiply-and-add-high operation.
9223 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9224 NarrowVT == MVT::v4i32 ||
9225 (Subtarget.hasVectorEnhancements3() &&
9226 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9227 return SDValue();
9228
9229 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9230 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9231 DL, NarrowVT, LeftOp.getOperand(0),
9232 MulhRightOp, MulhAddOp);
9233 bool IsSigned = N->getOpcode() == ISD::SRA;
9234 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9235}
9236
9237// Op is an operand of a multiplication. Check whether this can be folded
9238// into an even/odd widening operation; if so, return the opcode to be used
9239// and update Op to the appropriate sub-operand. Note that the caller must
9240// verify that *both* operands of the multiplication support the operation.
9242 const SystemZSubtarget &Subtarget,
9243 SDValue &Op) {
9244 EVT VT = Op.getValueType();
9245
9246 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9247 // to selecting the even or odd vector elements.
9248 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9249 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9250 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9251 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9252 unsigned NumElts = VT.getVectorNumElements();
9253 Op = Op.getOperand(0);
9254 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9255 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9257 ArrayRef<int> ShuffleMask = SVN->getMask();
9258 bool CanUseEven = true, CanUseOdd = true;
9259 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9260 if (ShuffleMask[Elt] == -1)
9261 continue;
9262 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9263 CanUseEven = false;
9264 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9265 CanUseOdd = false;
9266 }
9267 Op = Op.getOperand(0);
9268 if (CanUseEven)
9269 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9270 if (CanUseOdd)
9271 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9272 }
9273 }
9274
9275 // For z17, we can also support the v2i64->i128 case, which looks like
9276 // (sign/zero_extend (extract_vector_elt X 0/1))
9277 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9278 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9279 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9280 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9281 Op = Op.getOperand(0);
9282 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9283 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9284 Op.getOperand(1).getOpcode() == ISD::Constant) {
9285 unsigned Elem = Op.getConstantOperandVal(1);
9286 Op = Op.getOperand(0);
9287 if (Elem == 0)
9288 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9289 if (Elem == 1)
9290 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9291 }
9292 }
9293
9294 return 0;
9295}
9296
9297SDValue SystemZTargetLowering::combineMUL(
9298 SDNode *N, DAGCombinerInfo &DCI) const {
9299 SelectionDAG &DAG = DCI.DAG;
9300
9301 // Detect even/odd widening multiplication.
9302 SDValue Op0 = N->getOperand(0);
9303 SDValue Op1 = N->getOperand(1);
9304 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9305 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9306 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9307 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9308
9309 return SDValue();
9310}
9311
9312SDValue SystemZTargetLowering::combineINTRINSIC(
9313 SDNode *N, DAGCombinerInfo &DCI) const {
9314 SelectionDAG &DAG = DCI.DAG;
9315
9316 unsigned Id = N->getConstantOperandVal(1);
9317 switch (Id) {
9318 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9319 // or larger is simply a vector load.
9320 case Intrinsic::s390_vll:
9321 case Intrinsic::s390_vlrl:
9322 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9323 if (C->getZExtValue() >= 15)
9324 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9325 N->getOperand(3), MachinePointerInfo());
9326 break;
9327 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9328 case Intrinsic::s390_vstl:
9329 case Intrinsic::s390_vstrl:
9330 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9331 if (C->getZExtValue() >= 15)
9332 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9333 N->getOperand(4), MachinePointerInfo());
9334 break;
9335 }
9336
9337 return SDValue();
9338}
9339
9340SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9341 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9342 return N->getOperand(0);
9343 return N;
9344}
9345
9347 DAGCombinerInfo &DCI) const {
9348 switch(N->getOpcode()) {
9349 default: break;
9350 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9351 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9352 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9353 case SystemZISD::MERGE_HIGH:
9354 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9355 case ISD::LOAD: return combineLOAD(N, DCI);
9356 case ISD::STORE: return combineSTORE(N, DCI);
9357 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9358 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9359 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9361 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9363 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9364 case ISD::SINT_TO_FP:
9365 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9366 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9367 case ISD::BSWAP: return combineBSWAP(N, DCI);
9368 case ISD::SETCC: return combineSETCC(N, DCI);
9369 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9370 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9371 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9372 case ISD::SRL:
9373 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9374 case ISD::MUL: return combineMUL(N, DCI);
9375 case ISD::SDIV:
9376 case ISD::UDIV:
9377 case ISD::SREM:
9378 case ISD::UREM: return combineIntDIVREM(N, DCI);
9380 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9381 }
9382
9383 return SDValue();
9384}
9385
9386// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9387// are for Op.
9388static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9389 unsigned OpNo) {
9390 EVT VT = Op.getValueType();
9391 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9392 APInt SrcDemE;
9393 unsigned Opcode = Op.getOpcode();
9394 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9395 unsigned Id = Op.getConstantOperandVal(0);
9396 switch (Id) {
9397 case Intrinsic::s390_vpksh: // PACKS
9398 case Intrinsic::s390_vpksf:
9399 case Intrinsic::s390_vpksg:
9400 case Intrinsic::s390_vpkshs: // PACKS_CC
9401 case Intrinsic::s390_vpksfs:
9402 case Intrinsic::s390_vpksgs:
9403 case Intrinsic::s390_vpklsh: // PACKLS
9404 case Intrinsic::s390_vpklsf:
9405 case Intrinsic::s390_vpklsg:
9406 case Intrinsic::s390_vpklshs: // PACKLS_CC
9407 case Intrinsic::s390_vpklsfs:
9408 case Intrinsic::s390_vpklsgs:
9409 // VECTOR PACK truncates the elements of two source vectors into one.
9410 SrcDemE = DemandedElts;
9411 if (OpNo == 2)
9412 SrcDemE.lshrInPlace(NumElts / 2);
9413 SrcDemE = SrcDemE.trunc(NumElts / 2);
9414 break;
9415 // VECTOR UNPACK extends half the elements of the source vector.
9416 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9417 case Intrinsic::s390_vuphh:
9418 case Intrinsic::s390_vuphf:
9419 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9420 case Intrinsic::s390_vuplhh:
9421 case Intrinsic::s390_vuplhf:
9422 SrcDemE = APInt(NumElts * 2, 0);
9423 SrcDemE.insertBits(DemandedElts, 0);
9424 break;
9425 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9426 case Intrinsic::s390_vuplhw:
9427 case Intrinsic::s390_vuplf:
9428 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9429 case Intrinsic::s390_vupllh:
9430 case Intrinsic::s390_vupllf:
9431 SrcDemE = APInt(NumElts * 2, 0);
9432 SrcDemE.insertBits(DemandedElts, NumElts);
9433 break;
9434 case Intrinsic::s390_vpdi: {
9435 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9436 SrcDemE = APInt(NumElts, 0);
9437 if (!DemandedElts[OpNo - 1])
9438 break;
9439 unsigned Mask = Op.getConstantOperandVal(3);
9440 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9441 // Demand input element 0 or 1, given by the mask bit value.
9442 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9443 break;
9444 }
9445 case Intrinsic::s390_vsldb: {
9446 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9447 assert(VT == MVT::v16i8 && "Unexpected type.");
9448 unsigned FirstIdx = Op.getConstantOperandVal(3);
9449 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9450 unsigned NumSrc0Els = 16 - FirstIdx;
9451 SrcDemE = APInt(NumElts, 0);
9452 if (OpNo == 1) {
9453 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9454 SrcDemE.insertBits(DemEls, FirstIdx);
9455 } else {
9456 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9457 SrcDemE.insertBits(DemEls, 0);
9458 }
9459 break;
9460 }
9461 case Intrinsic::s390_vperm:
9462 SrcDemE = APInt::getAllOnes(NumElts);
9463 break;
9464 default:
9465 llvm_unreachable("Unhandled intrinsic.");
9466 break;
9467 }
9468 } else {
9469 switch (Opcode) {
9470 case SystemZISD::JOIN_DWORDS:
9471 // Scalar operand.
9472 SrcDemE = APInt(1, 1);
9473 break;
9474 case SystemZISD::SELECT_CCMASK:
9475 SrcDemE = DemandedElts;
9476 break;
9477 default:
9478 llvm_unreachable("Unhandled opcode.");
9479 break;
9480 }
9481 }
9482 return SrcDemE;
9483}
9484
9485static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9486 const APInt &DemandedElts,
9487 const SelectionDAG &DAG, unsigned Depth,
9488 unsigned OpNo) {
9489 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9490 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9491 KnownBits LHSKnown =
9492 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9493 KnownBits RHSKnown =
9494 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9495 Known = LHSKnown.intersectWith(RHSKnown);
9496}
9497
9498void
9500 KnownBits &Known,
9501 const APInt &DemandedElts,
9502 const SelectionDAG &DAG,
9503 unsigned Depth) const {
9504 Known.resetAll();
9505
9506 // Intrinsic CC result is returned in the two low bits.
9507 unsigned Tmp0, Tmp1; // not used
9508 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9509 Known.Zero.setBitsFrom(2);
9510 return;
9511 }
9512 EVT VT = Op.getValueType();
9513 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9514 return;
9515 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9516 "KnownBits does not match VT in bitwidth");
9517 assert ((!VT.isVector() ||
9518 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9519 "DemandedElts does not match VT number of elements");
9520 unsigned BitWidth = Known.getBitWidth();
9521 unsigned Opcode = Op.getOpcode();
9522 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9523 bool IsLogical = false;
9524 unsigned Id = Op.getConstantOperandVal(0);
9525 switch (Id) {
9526 case Intrinsic::s390_vpksh: // PACKS
9527 case Intrinsic::s390_vpksf:
9528 case Intrinsic::s390_vpksg:
9529 case Intrinsic::s390_vpkshs: // PACKS_CC
9530 case Intrinsic::s390_vpksfs:
9531 case Intrinsic::s390_vpksgs:
9532 case Intrinsic::s390_vpklsh: // PACKLS
9533 case Intrinsic::s390_vpklsf:
9534 case Intrinsic::s390_vpklsg:
9535 case Intrinsic::s390_vpklshs: // PACKLS_CC
9536 case Intrinsic::s390_vpklsfs:
9537 case Intrinsic::s390_vpklsgs:
9538 case Intrinsic::s390_vpdi:
9539 case Intrinsic::s390_vsldb:
9540 case Intrinsic::s390_vperm:
9541 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9542 break;
9543 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9544 case Intrinsic::s390_vuplhh:
9545 case Intrinsic::s390_vuplhf:
9546 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9547 case Intrinsic::s390_vupllh:
9548 case Intrinsic::s390_vupllf:
9549 IsLogical = true;
9550 [[fallthrough]];
9551 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9552 case Intrinsic::s390_vuphh:
9553 case Intrinsic::s390_vuphf:
9554 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9555 case Intrinsic::s390_vuplhw:
9556 case Intrinsic::s390_vuplf: {
9557 SDValue SrcOp = Op.getOperand(1);
9558 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9559 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9560 if (IsLogical) {
9561 Known = Known.zext(BitWidth);
9562 } else
9563 Known = Known.sext(BitWidth);
9564 break;
9565 }
9566 default:
9567 break;
9568 }
9569 } else {
9570 switch (Opcode) {
9571 case SystemZISD::JOIN_DWORDS:
9572 case SystemZISD::SELECT_CCMASK:
9573 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9574 break;
9575 case SystemZISD::REPLICATE: {
9576 SDValue SrcOp = Op.getOperand(0);
9577 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9579 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9580 break;
9581 }
9582 default:
9583 break;
9584 }
9585 }
9586
9587 // Known has the width of the source operand(s). Adjust if needed to match
9588 // the passed bitwidth.
9589 if (Known.getBitWidth() != BitWidth)
9590 Known = Known.anyextOrTrunc(BitWidth);
9591}
9592
9593static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9594 const SelectionDAG &DAG, unsigned Depth,
9595 unsigned OpNo) {
9596 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9597 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9598 if (LHS == 1) return 1; // Early out.
9599 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9600 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9601 if (RHS == 1) return 1; // Early out.
9602 unsigned Common = std::min(LHS, RHS);
9603 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9604 EVT VT = Op.getValueType();
9605 unsigned VTBits = VT.getScalarSizeInBits();
9606 if (SrcBitWidth > VTBits) { // PACK
9607 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9608 if (Common > SrcExtraBits)
9609 return (Common - SrcExtraBits);
9610 return 1;
9611 }
9612 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9613 return Common;
9614}
9615
9616unsigned
9618 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9619 unsigned Depth) const {
9620 if (Op.getResNo() != 0)
9621 return 1;
9622 unsigned Opcode = Op.getOpcode();
9623 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9624 unsigned Id = Op.getConstantOperandVal(0);
9625 switch (Id) {
9626 case Intrinsic::s390_vpksh: // PACKS
9627 case Intrinsic::s390_vpksf:
9628 case Intrinsic::s390_vpksg:
9629 case Intrinsic::s390_vpkshs: // PACKS_CC
9630 case Intrinsic::s390_vpksfs:
9631 case Intrinsic::s390_vpksgs:
9632 case Intrinsic::s390_vpklsh: // PACKLS
9633 case Intrinsic::s390_vpklsf:
9634 case Intrinsic::s390_vpklsg:
9635 case Intrinsic::s390_vpklshs: // PACKLS_CC
9636 case Intrinsic::s390_vpklsfs:
9637 case Intrinsic::s390_vpklsgs:
9638 case Intrinsic::s390_vpdi:
9639 case Intrinsic::s390_vsldb:
9640 case Intrinsic::s390_vperm:
9641 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9642 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9643 case Intrinsic::s390_vuphh:
9644 case Intrinsic::s390_vuphf:
9645 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9646 case Intrinsic::s390_vuplhw:
9647 case Intrinsic::s390_vuplf: {
9648 SDValue PackedOp = Op.getOperand(1);
9649 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9650 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9651 EVT VT = Op.getValueType();
9652 unsigned VTBits = VT.getScalarSizeInBits();
9653 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9654 return Tmp;
9655 }
9656 default:
9657 break;
9658 }
9659 } else {
9660 switch (Opcode) {
9661 case SystemZISD::SELECT_CCMASK:
9662 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9663 default:
9664 break;
9665 }
9666 }
9667
9668 return 1;
9669}
9670
9673 const APInt &DemandedElts, const SelectionDAG &DAG,
9674 bool PoisonOnly, unsigned Depth) const {
9675 switch (Op->getOpcode()) {
9676 case SystemZISD::PCREL_WRAPPER:
9677 case SystemZISD::PCREL_OFFSET:
9678 return true;
9679 }
9680 return false;
9681}
9682
9683unsigned
9685 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9686 unsigned StackAlign = TFI->getStackAlignment();
9687 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9688 "Unexpected stack alignment");
9689 // The default stack probe size is 4096 if the function has no
9690 // stack-probe-size attribute.
9691 unsigned StackProbeSize =
9692 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9693 // Round down to the stack alignment.
9694 StackProbeSize &= ~(StackAlign - 1);
9695 return StackProbeSize ? StackProbeSize : StackAlign;
9696}
9697
9698//===----------------------------------------------------------------------===//
9699// Custom insertion
9700//===----------------------------------------------------------------------===//
9701
9702// Force base value Base into a register before MI. Return the register.
9704 const SystemZInstrInfo *TII) {
9705 MachineBasicBlock *MBB = MI.getParent();
9706 MachineFunction &MF = *MBB->getParent();
9707 MachineRegisterInfo &MRI = MF.getRegInfo();
9708
9709 if (Base.isReg()) {
9710 // Copy Base into a new virtual register to help register coalescing in
9711 // cases with multiple uses.
9712 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9713 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9714 .add(Base);
9715 return Reg;
9716 }
9717
9718 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9719 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9720 .add(Base)
9721 .addImm(0)
9722 .addReg(0);
9723 return Reg;
9724}
9725
9726// The CC operand of MI might be missing a kill marker because there
9727// were multiple uses of CC, and ISel didn't know which to mark.
9728// Figure out whether MI should have had a kill marker.
9730 // Scan forward through BB for a use/def of CC.
9732 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9733 const MachineInstr &MI = *miI;
9734 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9735 return false;
9736 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9737 break; // Should have kill-flag - update below.
9738 }
9739
9740 // If we hit the end of the block, check whether CC is live into a
9741 // successor.
9742 if (miI == MBB->end()) {
9743 for (const MachineBasicBlock *Succ : MBB->successors())
9744 if (Succ->isLiveIn(SystemZ::CC))
9745 return false;
9746 }
9747
9748 return true;
9749}
9750
9751// Return true if it is OK for this Select pseudo-opcode to be cascaded
9752// together with other Select pseudo-opcodes into a single basic-block with
9753// a conditional jump around it.
9755 switch (MI.getOpcode()) {
9756 case SystemZ::Select32:
9757 case SystemZ::Select64:
9758 case SystemZ::Select128:
9759 case SystemZ::SelectF32:
9760 case SystemZ::SelectF64:
9761 case SystemZ::SelectF128:
9762 case SystemZ::SelectVR32:
9763 case SystemZ::SelectVR64:
9764 case SystemZ::SelectVR128:
9765 return true;
9766
9767 default:
9768 return false;
9769 }
9770}
9771
9772// Helper function, which inserts PHI functions into SinkMBB:
9773// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9774// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9776 MachineBasicBlock *TrueMBB,
9777 MachineBasicBlock *FalseMBB,
9778 MachineBasicBlock *SinkMBB) {
9779 MachineFunction *MF = TrueMBB->getParent();
9781
9782 MachineInstr *FirstMI = Selects.front();
9783 unsigned CCValid = FirstMI->getOperand(3).getImm();
9784 unsigned CCMask = FirstMI->getOperand(4).getImm();
9785
9786 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9787
9788 // As we are creating the PHIs, we have to be careful if there is more than
9789 // one. Later Selects may reference the results of earlier Selects, but later
9790 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9791 // That also means that PHI construction must work forward from earlier to
9792 // later, and that the code must maintain a mapping from earlier PHI's
9793 // destination registers, and the registers that went into the PHI.
9795
9796 for (auto *MI : Selects) {
9797 Register DestReg = MI->getOperand(0).getReg();
9798 Register TrueReg = MI->getOperand(1).getReg();
9799 Register FalseReg = MI->getOperand(2).getReg();
9800
9801 // If this Select we are generating is the opposite condition from
9802 // the jump we generated, then we have to swap the operands for the
9803 // PHI that is going to be generated.
9804 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9805 std::swap(TrueReg, FalseReg);
9806
9807 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9808 TrueReg = It->second.first;
9809
9810 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9811 FalseReg = It->second.second;
9812
9813 DebugLoc DL = MI->getDebugLoc();
9814 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9815 .addReg(TrueReg).addMBB(TrueMBB)
9816 .addReg(FalseReg).addMBB(FalseMBB);
9817
9818 // Add this PHI to the rewrite table.
9819 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9820 }
9821
9822 MF->getProperties().resetNoPHIs();
9823}
9824
9826SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9827 MachineBasicBlock *BB) const {
9828 MachineFunction &MF = *BB->getParent();
9829 MachineFrameInfo &MFI = MF.getFrameInfo();
9830 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9831 assert(TFL->hasReservedCallFrame(MF) &&
9832 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9833 (void)TFL;
9834 // Get the MaxCallFrameSize value and erase MI since it serves no further
9835 // purpose as the call frame is statically reserved in the prolog. Set
9836 // AdjustsStack as MI is *not* mapped as a frame instruction.
9837 uint32_t NumBytes = MI.getOperand(0).getImm();
9838 if (NumBytes > MFI.getMaxCallFrameSize())
9839 MFI.setMaxCallFrameSize(NumBytes);
9840 MFI.setAdjustsStack(true);
9841
9842 MI.eraseFromParent();
9843 return BB;
9844}
9845
9846// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9848SystemZTargetLowering::emitSelect(MachineInstr &MI,
9849 MachineBasicBlock *MBB) const {
9850 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9851 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9852
9853 unsigned CCValid = MI.getOperand(3).getImm();
9854 unsigned CCMask = MI.getOperand(4).getImm();
9855
9856 // If we have a sequence of Select* pseudo instructions using the
9857 // same condition code value, we want to expand all of them into
9858 // a single pair of basic blocks using the same condition.
9859 SmallVector<MachineInstr*, 8> Selects;
9860 SmallVector<MachineInstr*, 8> DbgValues;
9861 Selects.push_back(&MI);
9862 unsigned Count = 0;
9863 for (MachineInstr &NextMI : llvm::make_range(
9864 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9865 if (isSelectPseudo(NextMI)) {
9866 assert(NextMI.getOperand(3).getImm() == CCValid &&
9867 "Bad CCValid operands since CC was not redefined.");
9868 if (NextMI.getOperand(4).getImm() == CCMask ||
9869 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9870 Selects.push_back(&NextMI);
9871 continue;
9872 }
9873 break;
9874 }
9875 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9876 NextMI.usesCustomInsertionHook())
9877 break;
9878 bool User = false;
9879 for (auto *SelMI : Selects)
9880 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9881 User = true;
9882 break;
9883 }
9884 if (NextMI.isDebugInstr()) {
9885 if (User) {
9886 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9887 DbgValues.push_back(&NextMI);
9888 }
9889 } else if (User || ++Count > 20)
9890 break;
9891 }
9892
9893 MachineInstr *LastMI = Selects.back();
9894 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9895 checkCCKill(*LastMI, MBB));
9896 MachineBasicBlock *StartMBB = MBB;
9897 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9898 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9899
9900 // Unless CC was killed in the last Select instruction, mark it as
9901 // live-in to both FalseMBB and JoinMBB.
9902 if (!CCKilled) {
9903 FalseMBB->addLiveIn(SystemZ::CC);
9904 JoinMBB->addLiveIn(SystemZ::CC);
9905 }
9906
9907 // StartMBB:
9908 // BRC CCMask, JoinMBB
9909 // # fallthrough to FalseMBB
9910 MBB = StartMBB;
9911 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9912 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9913 MBB->addSuccessor(JoinMBB);
9914 MBB->addSuccessor(FalseMBB);
9915
9916 // FalseMBB:
9917 // # fallthrough to JoinMBB
9918 MBB = FalseMBB;
9919 MBB->addSuccessor(JoinMBB);
9920
9921 // JoinMBB:
9922 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9923 // ...
9924 MBB = JoinMBB;
9925 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9926 for (auto *SelMI : Selects)
9927 SelMI->eraseFromParent();
9928
9930 for (auto *DbgMI : DbgValues)
9931 MBB->splice(InsertPos, StartMBB, DbgMI);
9932
9933 return JoinMBB;
9934}
9935
9936// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9937// StoreOpcode is the store to use and Invert says whether the store should
9938// happen when the condition is false rather than true. If a STORE ON
9939// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9940MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9942 unsigned StoreOpcode,
9943 unsigned STOCOpcode,
9944 bool Invert) const {
9945 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9946
9947 Register SrcReg = MI.getOperand(0).getReg();
9948 MachineOperand Base = MI.getOperand(1);
9949 int64_t Disp = MI.getOperand(2).getImm();
9950 Register IndexReg = MI.getOperand(3).getReg();
9951 unsigned CCValid = MI.getOperand(4).getImm();
9952 unsigned CCMask = MI.getOperand(5).getImm();
9953 DebugLoc DL = MI.getDebugLoc();
9954
9955 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
9956
9957 // ISel pattern matching also adds a load memory operand of the same
9958 // address, so take special care to find the storing memory operand.
9959 MachineMemOperand *MMO = nullptr;
9960 for (auto *I : MI.memoperands())
9961 if (I->isStore()) {
9962 MMO = I;
9963 break;
9964 }
9965
9966 // Use STOCOpcode if possible. We could use different store patterns in
9967 // order to avoid matching the index register, but the performance trade-offs
9968 // might be more complicated in that case.
9969 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9970 if (Invert)
9971 CCMask ^= CCValid;
9972
9973 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
9974 .addReg(SrcReg)
9975 .add(Base)
9976 .addImm(Disp)
9977 .addImm(CCValid)
9978 .addImm(CCMask)
9979 .addMemOperand(MMO);
9980
9981 MI.eraseFromParent();
9982 return MBB;
9983 }
9984
9985 // Get the condition needed to branch around the store.
9986 if (!Invert)
9987 CCMask ^= CCValid;
9988
9989 MachineBasicBlock *StartMBB = MBB;
9990 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
9991 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9992
9993 // Unless CC was killed in the CondStore instruction, mark it as
9994 // live-in to both FalseMBB and JoinMBB.
9995 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
9996 !checkCCKill(MI, JoinMBB)) {
9997 FalseMBB->addLiveIn(SystemZ::CC);
9998 JoinMBB->addLiveIn(SystemZ::CC);
9999 }
10000
10001 // StartMBB:
10002 // BRC CCMask, JoinMBB
10003 // # fallthrough to FalseMBB
10004 MBB = StartMBB;
10005 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10006 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
10007 MBB->addSuccessor(JoinMBB);
10008 MBB->addSuccessor(FalseMBB);
10009
10010 // FalseMBB:
10011 // store %SrcReg, %Disp(%Index,%Base)
10012 // # fallthrough to JoinMBB
10013 MBB = FalseMBB;
10014 BuildMI(MBB, DL, TII->get(StoreOpcode))
10015 .addReg(SrcReg)
10016 .add(Base)
10017 .addImm(Disp)
10018 .addReg(IndexReg)
10019 .addMemOperand(MMO);
10020 MBB->addSuccessor(JoinMBB);
10021
10022 MI.eraseFromParent();
10023 return JoinMBB;
10024}
10025
10026// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
10028SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
10030 bool Unsigned) const {
10031 MachineFunction &MF = *MBB->getParent();
10032 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10033 MachineRegisterInfo &MRI = MF.getRegInfo();
10034
10035 // Synthetic instruction to compare 128-bit values.
10036 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
10037 Register Op0 = MI.getOperand(0).getReg();
10038 Register Op1 = MI.getOperand(1).getReg();
10039
10040 MachineBasicBlock *StartMBB = MBB;
10041 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
10042 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
10043
10044 // StartMBB:
10045 //
10046 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
10047 // Swap the inputs to get:
10048 // CC 1 if high(Op0) > high(Op1)
10049 // CC 2 if high(Op0) < high(Op1)
10050 // CC 0 if high(Op0) == high(Op1)
10051 //
10052 // If CC != 0, we'd done, so jump over the next instruction.
10053 //
10054 // VEC[L]G Op1, Op0
10055 // JNE JoinMBB
10056 // # fallthrough to HiEqMBB
10057 MBB = StartMBB;
10058 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10059 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
10060 .addReg(Op1).addReg(Op0);
10061 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
10063 MBB->addSuccessor(JoinMBB);
10064 MBB->addSuccessor(HiEqMBB);
10065
10066 // HiEqMBB:
10067 //
10068 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10069 // Since we already know the high parts are equal, the CC
10070 // result will only depend on the low parts:
10071 // CC 1 if low(Op0) > low(Op1)
10072 // CC 3 if low(Op0) <= low(Op1)
10073 //
10074 // VCHLGS Tmp, Op0, Op1
10075 // # fallthrough to JoinMBB
10076 MBB = HiEqMBB;
10077 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10078 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10079 .addReg(Op0).addReg(Op1);
10080 MBB->addSuccessor(JoinMBB);
10081
10082 // Mark CC as live-in to JoinMBB.
10083 JoinMBB->addLiveIn(SystemZ::CC);
10084
10085 MI.eraseFromParent();
10086 return JoinMBB;
10087}
10088
10089// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10090// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10091// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10092// whether the field should be inverted after performing BinOpcode (e.g. for
10093// NAND).
10094MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10095 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10096 bool Invert) const {
10097 MachineFunction &MF = *MBB->getParent();
10098 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10099 MachineRegisterInfo &MRI = MF.getRegInfo();
10100
10101 // Extract the operands. Base can be a register or a frame index.
10102 // Src2 can be a register or immediate.
10103 Register Dest = MI.getOperand(0).getReg();
10104 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10105 int64_t Disp = MI.getOperand(2).getImm();
10106 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10107 Register BitShift = MI.getOperand(4).getReg();
10108 Register NegBitShift = MI.getOperand(5).getReg();
10109 unsigned BitSize = MI.getOperand(6).getImm();
10110 DebugLoc DL = MI.getDebugLoc();
10111
10112 // Get the right opcodes for the displacement.
10113 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10114 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10115 assert(LOpcode && CSOpcode && "Displacement out of range");
10116
10117 // Create virtual registers for temporary results.
10118 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10119 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10120 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10121 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10122 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10123
10124 // Insert a basic block for the main loop.
10125 MachineBasicBlock *StartMBB = MBB;
10126 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10127 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10128
10129 // StartMBB:
10130 // ...
10131 // %OrigVal = L Disp(%Base)
10132 // # fall through to LoopMBB
10133 MBB = StartMBB;
10134 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10135 MBB->addSuccessor(LoopMBB);
10136
10137 // LoopMBB:
10138 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10139 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10140 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10141 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10142 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10143 // JNE LoopMBB
10144 // # fall through to DoneMBB
10145 MBB = LoopMBB;
10146 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10147 .addReg(OrigVal).addMBB(StartMBB)
10148 .addReg(Dest).addMBB(LoopMBB);
10149 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10150 .addReg(OldVal).addReg(BitShift).addImm(0);
10151 if (Invert) {
10152 // Perform the operation normally and then invert every bit of the field.
10153 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10154 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10155 // XILF with the upper BitSize bits set.
10156 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10157 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10158 } else if (BinOpcode)
10159 // A simply binary operation.
10160 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10161 .addReg(RotatedOldVal)
10162 .add(Src2);
10163 else
10164 // Use RISBG to rotate Src2 into position and use it to replace the
10165 // field in RotatedOldVal.
10166 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10167 .addReg(RotatedOldVal).addReg(Src2.getReg())
10168 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10169 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10170 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10171 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10172 .addReg(OldVal)
10173 .addReg(NewVal)
10174 .add(Base)
10175 .addImm(Disp);
10176 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10178 MBB->addSuccessor(LoopMBB);
10179 MBB->addSuccessor(DoneMBB);
10180
10181 MI.eraseFromParent();
10182 return DoneMBB;
10183}
10184
10185// Implement EmitInstrWithCustomInserter for subword pseudo
10186// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10187// instruction that should be used to compare the current field with the
10188// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10189// for when the current field should be kept.
10190MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10191 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10192 unsigned KeepOldMask) const {
10193 MachineFunction &MF = *MBB->getParent();
10194 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10195 MachineRegisterInfo &MRI = MF.getRegInfo();
10196
10197 // Extract the operands. Base can be a register or a frame index.
10198 Register Dest = MI.getOperand(0).getReg();
10199 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10200 int64_t Disp = MI.getOperand(2).getImm();
10201 Register Src2 = MI.getOperand(3).getReg();
10202 Register BitShift = MI.getOperand(4).getReg();
10203 Register NegBitShift = MI.getOperand(5).getReg();
10204 unsigned BitSize = MI.getOperand(6).getImm();
10205 DebugLoc DL = MI.getDebugLoc();
10206
10207 // Get the right opcodes for the displacement.
10208 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10209 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10210 assert(LOpcode && CSOpcode && "Displacement out of range");
10211
10212 // Create virtual registers for temporary results.
10213 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10214 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10215 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10216 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10217 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10218 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10219
10220 // Insert 3 basic blocks for the loop.
10221 MachineBasicBlock *StartMBB = MBB;
10222 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10223 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10224 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10225 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10226
10227 // StartMBB:
10228 // ...
10229 // %OrigVal = L Disp(%Base)
10230 // # fall through to LoopMBB
10231 MBB = StartMBB;
10232 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10233 MBB->addSuccessor(LoopMBB);
10234
10235 // LoopMBB:
10236 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10237 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10238 // CompareOpcode %RotatedOldVal, %Src2
10239 // BRC KeepOldMask, UpdateMBB
10240 MBB = LoopMBB;
10241 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10242 .addReg(OrigVal).addMBB(StartMBB)
10243 .addReg(Dest).addMBB(UpdateMBB);
10244 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10245 .addReg(OldVal).addReg(BitShift).addImm(0);
10246 BuildMI(MBB, DL, TII->get(CompareOpcode))
10247 .addReg(RotatedOldVal).addReg(Src2);
10248 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10249 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10250 MBB->addSuccessor(UpdateMBB);
10251 MBB->addSuccessor(UseAltMBB);
10252
10253 // UseAltMBB:
10254 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10255 // # fall through to UpdateMBB
10256 MBB = UseAltMBB;
10257 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10258 .addReg(RotatedOldVal).addReg(Src2)
10259 .addImm(32).addImm(31 + BitSize).addImm(0);
10260 MBB->addSuccessor(UpdateMBB);
10261
10262 // UpdateMBB:
10263 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10264 // [ %RotatedAltVal, UseAltMBB ]
10265 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10266 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10267 // JNE LoopMBB
10268 // # fall through to DoneMBB
10269 MBB = UpdateMBB;
10270 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10271 .addReg(RotatedOldVal).addMBB(LoopMBB)
10272 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10273 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10274 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10275 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10276 .addReg(OldVal)
10277 .addReg(NewVal)
10278 .add(Base)
10279 .addImm(Disp);
10280 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10282 MBB->addSuccessor(LoopMBB);
10283 MBB->addSuccessor(DoneMBB);
10284
10285 MI.eraseFromParent();
10286 return DoneMBB;
10287}
10288
10289// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10290// instruction MI.
10292SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10293 MachineBasicBlock *MBB) const {
10294 MachineFunction &MF = *MBB->getParent();
10295 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10296 MachineRegisterInfo &MRI = MF.getRegInfo();
10297
10298 // Extract the operands. Base can be a register or a frame index.
10299 Register Dest = MI.getOperand(0).getReg();
10300 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10301 int64_t Disp = MI.getOperand(2).getImm();
10302 Register CmpVal = MI.getOperand(3).getReg();
10303 Register OrigSwapVal = MI.getOperand(4).getReg();
10304 Register BitShift = MI.getOperand(5).getReg();
10305 Register NegBitShift = MI.getOperand(6).getReg();
10306 int64_t BitSize = MI.getOperand(7).getImm();
10307 DebugLoc DL = MI.getDebugLoc();
10308
10309 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10310
10311 // Get the right opcodes for the displacement and zero-extension.
10312 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10313 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10314 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10315 assert(LOpcode && CSOpcode && "Displacement out of range");
10316
10317 // Create virtual registers for temporary results.
10318 Register OrigOldVal = MRI.createVirtualRegister(RC);
10319 Register OldVal = MRI.createVirtualRegister(RC);
10320 Register SwapVal = MRI.createVirtualRegister(RC);
10321 Register StoreVal = MRI.createVirtualRegister(RC);
10322 Register OldValRot = MRI.createVirtualRegister(RC);
10323 Register RetryOldVal = MRI.createVirtualRegister(RC);
10324 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10325
10326 // Insert 2 basic blocks for the loop.
10327 MachineBasicBlock *StartMBB = MBB;
10328 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10329 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10330 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10331
10332 // StartMBB:
10333 // ...
10334 // %OrigOldVal = L Disp(%Base)
10335 // # fall through to LoopMBB
10336 MBB = StartMBB;
10337 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10338 .add(Base)
10339 .addImm(Disp)
10340 .addReg(0);
10341 MBB->addSuccessor(LoopMBB);
10342
10343 // LoopMBB:
10344 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10345 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10346 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10347 // ^^ The low BitSize bits contain the field
10348 // of interest.
10349 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10350 // ^^ Replace the upper 32-BitSize bits of the
10351 // swap value with those that we loaded and rotated.
10352 // %Dest = LL[CH] %OldValRot
10353 // CR %Dest, %CmpVal
10354 // JNE DoneMBB
10355 // # Fall through to SetMBB
10356 MBB = LoopMBB;
10357 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10358 .addReg(OrigOldVal).addMBB(StartMBB)
10359 .addReg(RetryOldVal).addMBB(SetMBB);
10360 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10361 .addReg(OrigSwapVal).addMBB(StartMBB)
10362 .addReg(RetrySwapVal).addMBB(SetMBB);
10363 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10364 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10365 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10366 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10367 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10368 .addReg(OldValRot);
10369 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10370 .addReg(Dest).addReg(CmpVal);
10371 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10374 MBB->addSuccessor(DoneMBB);
10375 MBB->addSuccessor(SetMBB);
10376
10377 // SetMBB:
10378 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10379 // ^^ Rotate the new field to its proper position.
10380 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10381 // JNE LoopMBB
10382 // # fall through to ExitMBB
10383 MBB = SetMBB;
10384 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10385 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10386 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10387 .addReg(OldVal)
10388 .addReg(StoreVal)
10389 .add(Base)
10390 .addImm(Disp);
10391 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10393 MBB->addSuccessor(LoopMBB);
10394 MBB->addSuccessor(DoneMBB);
10395
10396 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10397 // to the block after the loop. At this point, CC may have been defined
10398 // either by the CR in LoopMBB or by the CS in SetMBB.
10399 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10400 DoneMBB->addLiveIn(SystemZ::CC);
10401
10402 MI.eraseFromParent();
10403 return DoneMBB;
10404}
10405
10406// Emit a move from two GR64s to a GR128.
10408SystemZTargetLowering::emitPair128(MachineInstr &MI,
10409 MachineBasicBlock *MBB) const {
10410 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10411 const DebugLoc &DL = MI.getDebugLoc();
10412
10413 Register Dest = MI.getOperand(0).getReg();
10414 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10415 .add(MI.getOperand(1))
10416 .addImm(SystemZ::subreg_h64)
10417 .add(MI.getOperand(2))
10418 .addImm(SystemZ::subreg_l64);
10419 MI.eraseFromParent();
10420 return MBB;
10421}
10422
10423// Emit an extension from a GR64 to a GR128. ClearEven is true
10424// if the high register of the GR128 value must be cleared or false if
10425// it's "don't care".
10426MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10428 bool ClearEven) const {
10429 MachineFunction &MF = *MBB->getParent();
10430 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10431 MachineRegisterInfo &MRI = MF.getRegInfo();
10432 DebugLoc DL = MI.getDebugLoc();
10433
10434 Register Dest = MI.getOperand(0).getReg();
10435 Register Src = MI.getOperand(1).getReg();
10436 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10437
10438 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10439 if (ClearEven) {
10440 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10441 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10442
10443 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10444 .addImm(0);
10445 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10446 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10447 In128 = NewIn128;
10448 }
10449 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10450 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10451
10452 MI.eraseFromParent();
10453 return MBB;
10454}
10455
10457SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10459 unsigned Opcode, bool IsMemset) const {
10460 MachineFunction &MF = *MBB->getParent();
10461 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10462 MachineRegisterInfo &MRI = MF.getRegInfo();
10463 DebugLoc DL = MI.getDebugLoc();
10464
10465 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10466 uint64_t DestDisp = MI.getOperand(1).getImm();
10467 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10468 uint64_t SrcDisp;
10469
10470 // Fold the displacement Disp if it is out of range.
10471 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10472 if (!isUInt<12>(Disp)) {
10473 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10474 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10475 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10476 .add(Base).addImm(Disp).addReg(0);
10478 Disp = 0;
10479 }
10480 };
10481
10482 if (!IsMemset) {
10483 SrcBase = earlyUseOperand(MI.getOperand(2));
10484 SrcDisp = MI.getOperand(3).getImm();
10485 } else {
10486 SrcBase = DestBase;
10487 SrcDisp = DestDisp++;
10488 foldDisplIfNeeded(DestBase, DestDisp);
10489 }
10490
10491 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10492 bool IsImmForm = LengthMO.isImm();
10493 bool IsRegForm = !IsImmForm;
10494
10495 // Build and insert one Opcode of Length, with special treatment for memset.
10496 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10498 MachineOperand DBase, uint64_t DDisp,
10499 MachineOperand SBase, uint64_t SDisp,
10500 unsigned Length) -> void {
10501 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10502 if (IsMemset) {
10503 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10504 if (ByteMO.isImm())
10505 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10506 .add(SBase).addImm(SDisp).add(ByteMO);
10507 else
10508 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10509 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10510 if (--Length == 0)
10511 return;
10512 }
10513 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10514 .add(DBase).addImm(DDisp).addImm(Length)
10515 .add(SBase).addImm(SDisp)
10516 .setMemRefs(MI.memoperands());
10517 };
10518
10519 bool NeedsLoop = false;
10520 uint64_t ImmLength = 0;
10521 Register LenAdjReg = SystemZ::NoRegister;
10522 if (IsImmForm) {
10523 ImmLength = LengthMO.getImm();
10524 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10525 if (ImmLength == 0) {
10526 MI.eraseFromParent();
10527 return MBB;
10528 }
10529 if (Opcode == SystemZ::CLC) {
10530 if (ImmLength > 3 * 256)
10531 // A two-CLC sequence is a clear win over a loop, not least because
10532 // it needs only one branch. A three-CLC sequence needs the same
10533 // number of branches as a loop (i.e. 2), but is shorter. That
10534 // brings us to lengths greater than 768 bytes. It seems relatively
10535 // likely that a difference will be found within the first 768 bytes,
10536 // so we just optimize for the smallest number of branch
10537 // instructions, in order to avoid polluting the prediction buffer
10538 // too much.
10539 NeedsLoop = true;
10540 } else if (ImmLength > 6 * 256)
10541 // The heuristic we use is to prefer loops for anything that would
10542 // require 7 or more MVCs. With these kinds of sizes there isn't much
10543 // to choose between straight-line code and looping code, since the
10544 // time will be dominated by the MVCs themselves.
10545 NeedsLoop = true;
10546 } else {
10547 NeedsLoop = true;
10548 LenAdjReg = LengthMO.getReg();
10549 }
10550
10551 // When generating more than one CLC, all but the last will need to
10552 // branch to the end when a difference is found.
10553 MachineBasicBlock *EndMBB =
10554 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10556 : nullptr);
10557
10558 if (NeedsLoop) {
10559 Register StartCountReg =
10560 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10561 if (IsImmForm) {
10562 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10563 ImmLength &= 255;
10564 } else {
10565 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10566 .addReg(LenAdjReg)
10567 .addReg(0)
10568 .addImm(8);
10569 }
10570
10571 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10572 auto loadZeroAddress = [&]() -> MachineOperand {
10573 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10574 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10575 return MachineOperand::CreateReg(Reg, false);
10576 };
10577 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10578 DestBase = loadZeroAddress();
10579 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10580 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10581
10582 MachineBasicBlock *StartMBB = nullptr;
10583 MachineBasicBlock *LoopMBB = nullptr;
10584 MachineBasicBlock *NextMBB = nullptr;
10585 MachineBasicBlock *DoneMBB = nullptr;
10586 MachineBasicBlock *AllDoneMBB = nullptr;
10587
10588 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10589 Register StartDestReg =
10590 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10591
10592 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10593 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10594 Register ThisDestReg =
10595 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10596 Register NextSrcReg = MRI.createVirtualRegister(RC);
10597 Register NextDestReg =
10598 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10599 RC = &SystemZ::GR64BitRegClass;
10600 Register ThisCountReg = MRI.createVirtualRegister(RC);
10601 Register NextCountReg = MRI.createVirtualRegister(RC);
10602
10603 if (IsRegForm) {
10604 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10605 StartMBB = SystemZ::emitBlockAfter(MBB);
10606 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10607 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10608 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10609
10610 // MBB:
10611 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10612 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10613 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10614 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10616 .addMBB(AllDoneMBB);
10617 MBB->addSuccessor(AllDoneMBB);
10618 if (!IsMemset)
10619 MBB->addSuccessor(StartMBB);
10620 else {
10621 // MemsetOneCheckMBB:
10622 // # Jump to MemsetOneMBB for a memset of length 1, or
10623 // # fall thru to StartMBB.
10624 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10625 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10626 MBB->addSuccessor(MemsetOneCheckMBB);
10627 MBB = MemsetOneCheckMBB;
10628 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10629 .addReg(LenAdjReg).addImm(-1);
10630 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10632 .addMBB(MemsetOneMBB);
10633 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10634 MBB->addSuccessor(StartMBB, {90, 100});
10635
10636 // MemsetOneMBB:
10637 // # Jump back to AllDoneMBB after a single MVI or STC.
10638 MBB = MemsetOneMBB;
10639 insertMemMemOp(MBB, MBB->end(),
10640 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10641 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10642 1);
10643 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10644 MBB->addSuccessor(AllDoneMBB);
10645 }
10646
10647 // StartMBB:
10648 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10649 MBB = StartMBB;
10650 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10651 .addReg(StartCountReg).addImm(0);
10652 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10654 .addMBB(DoneMBB);
10655 MBB->addSuccessor(DoneMBB);
10656 MBB->addSuccessor(LoopMBB);
10657 }
10658 else {
10659 StartMBB = MBB;
10660 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10661 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10662 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10663
10664 // StartMBB:
10665 // # fall through to LoopMBB
10666 MBB->addSuccessor(LoopMBB);
10667
10668 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10669 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10670 if (EndMBB && !ImmLength)
10671 // If the loop handled the whole CLC range, DoneMBB will be empty with
10672 // CC live-through into EndMBB, so add it as live-in.
10673 DoneMBB->addLiveIn(SystemZ::CC);
10674 }
10675
10676 // LoopMBB:
10677 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10678 // [ %NextDestReg, NextMBB ]
10679 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10680 // [ %NextSrcReg, NextMBB ]
10681 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10682 // [ %NextCountReg, NextMBB ]
10683 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10684 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10685 // ( JLH EndMBB )
10686 //
10687 // The prefetch is used only for MVC. The JLH is used only for CLC.
10688 MBB = LoopMBB;
10689 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10690 .addReg(StartDestReg).addMBB(StartMBB)
10691 .addReg(NextDestReg).addMBB(NextMBB);
10692 if (!HaveSingleBase)
10693 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10694 .addReg(StartSrcReg).addMBB(StartMBB)
10695 .addReg(NextSrcReg).addMBB(NextMBB);
10696 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10697 .addReg(StartCountReg).addMBB(StartMBB)
10698 .addReg(NextCountReg).addMBB(NextMBB);
10699 if (Opcode == SystemZ::MVC)
10700 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10702 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10703 insertMemMemOp(MBB, MBB->end(),
10704 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10705 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10706 if (EndMBB) {
10707 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10709 .addMBB(EndMBB);
10710 MBB->addSuccessor(EndMBB);
10711 MBB->addSuccessor(NextMBB);
10712 }
10713
10714 // NextMBB:
10715 // %NextDestReg = LA 256(%ThisDestReg)
10716 // %NextSrcReg = LA 256(%ThisSrcReg)
10717 // %NextCountReg = AGHI %ThisCountReg, -1
10718 // CGHI %NextCountReg, 0
10719 // JLH LoopMBB
10720 // # fall through to DoneMBB
10721 //
10722 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10723 MBB = NextMBB;
10724 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10725 .addReg(ThisDestReg).addImm(256).addReg(0);
10726 if (!HaveSingleBase)
10727 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10728 .addReg(ThisSrcReg).addImm(256).addReg(0);
10729 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10730 .addReg(ThisCountReg).addImm(-1);
10731 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10732 .addReg(NextCountReg).addImm(0);
10733 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10735 .addMBB(LoopMBB);
10736 MBB->addSuccessor(LoopMBB);
10737 MBB->addSuccessor(DoneMBB);
10738
10739 MBB = DoneMBB;
10740 if (IsRegForm) {
10741 // DoneMBB:
10742 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10743 // # Use EXecute Relative Long for the remainder of the bytes. The target
10744 // instruction of the EXRL will have a length field of 1 since 0 is an
10745 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10746 // 0xff) + 1.
10747 // # Fall through to AllDoneMBB.
10748 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10749 Register RemDestReg = HaveSingleBase ? RemSrcReg
10750 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10751 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10752 .addReg(StartDestReg).addMBB(StartMBB)
10753 .addReg(NextDestReg).addMBB(NextMBB);
10754 if (!HaveSingleBase)
10755 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10756 .addReg(StartSrcReg).addMBB(StartMBB)
10757 .addReg(NextSrcReg).addMBB(NextMBB);
10758 if (IsMemset)
10759 insertMemMemOp(MBB, MBB->end(),
10760 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10761 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10762 MachineInstrBuilder EXRL_MIB =
10763 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10764 .addImm(Opcode)
10765 .addReg(LenAdjReg)
10766 .addReg(RemDestReg).addImm(DestDisp)
10767 .addReg(RemSrcReg).addImm(SrcDisp);
10768 MBB->addSuccessor(AllDoneMBB);
10769 MBB = AllDoneMBB;
10770 if (Opcode != SystemZ::MVC) {
10771 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10772 if (EndMBB)
10773 MBB->addLiveIn(SystemZ::CC);
10774 }
10775 }
10776 MF.getProperties().resetNoPHIs();
10777 }
10778
10779 // Handle any remaining bytes with straight-line code.
10780 while (ImmLength > 0) {
10781 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10782 // The previous iteration might have created out-of-range displacements.
10783 // Apply them using LA/LAY if so.
10784 foldDisplIfNeeded(DestBase, DestDisp);
10785 foldDisplIfNeeded(SrcBase, SrcDisp);
10786 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10787 DestDisp += ThisLength;
10788 SrcDisp += ThisLength;
10789 ImmLength -= ThisLength;
10790 // If there's another CLC to go, branch to the end if a difference
10791 // was found.
10792 if (EndMBB && ImmLength > 0) {
10793 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10794 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10796 .addMBB(EndMBB);
10797 MBB->addSuccessor(EndMBB);
10798 MBB->addSuccessor(NextMBB);
10799 MBB = NextMBB;
10800 }
10801 }
10802 if (EndMBB) {
10803 MBB->addSuccessor(EndMBB);
10804 MBB = EndMBB;
10805 MBB->addLiveIn(SystemZ::CC);
10806 }
10807
10808 MI.eraseFromParent();
10809 return MBB;
10810}
10811
10812// Decompose string pseudo-instruction MI into a loop that continually performs
10813// Opcode until CC != 3.
10814MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10815 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10816 MachineFunction &MF = *MBB->getParent();
10817 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10818 MachineRegisterInfo &MRI = MF.getRegInfo();
10819 DebugLoc DL = MI.getDebugLoc();
10820
10821 uint64_t End1Reg = MI.getOperand(0).getReg();
10822 uint64_t Start1Reg = MI.getOperand(1).getReg();
10823 uint64_t Start2Reg = MI.getOperand(2).getReg();
10824 uint64_t CharReg = MI.getOperand(3).getReg();
10825
10826 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10827 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10828 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10829 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10830
10831 MachineBasicBlock *StartMBB = MBB;
10832 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10833 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10834
10835 // StartMBB:
10836 // # fall through to LoopMBB
10837 MBB->addSuccessor(LoopMBB);
10838
10839 // LoopMBB:
10840 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10841 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10842 // R0L = %CharReg
10843 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10844 // JO LoopMBB
10845 // # fall through to DoneMBB
10846 //
10847 // The load of R0L can be hoisted by post-RA LICM.
10848 MBB = LoopMBB;
10849
10850 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10851 .addReg(Start1Reg).addMBB(StartMBB)
10852 .addReg(End1Reg).addMBB(LoopMBB);
10853 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10854 .addReg(Start2Reg).addMBB(StartMBB)
10855 .addReg(End2Reg).addMBB(LoopMBB);
10856 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10857 BuildMI(MBB, DL, TII->get(Opcode))
10858 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10859 .addReg(This1Reg).addReg(This2Reg);
10860 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10862 MBB->addSuccessor(LoopMBB);
10863 MBB->addSuccessor(DoneMBB);
10864
10865 DoneMBB->addLiveIn(SystemZ::CC);
10866
10867 MI.eraseFromParent();
10868 return DoneMBB;
10869}
10870
10871// Update TBEGIN instruction with final opcode and register clobbers.
10872MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10873 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10874 bool NoFloat) const {
10875 MachineFunction &MF = *MBB->getParent();
10876 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10877 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10878
10879 // Update opcode.
10880 MI.setDesc(TII->get(Opcode));
10881
10882 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10883 // Make sure to add the corresponding GRSM bits if they are missing.
10884 uint64_t Control = MI.getOperand(2).getImm();
10885 static const unsigned GPRControlBit[16] = {
10886 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10887 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10888 };
10889 Control |= GPRControlBit[15];
10890 if (TFI->hasFP(MF))
10891 Control |= GPRControlBit[11];
10892 MI.getOperand(2).setImm(Control);
10893
10894 // Add GPR clobbers.
10895 for (int I = 0; I < 16; I++) {
10896 if ((Control & GPRControlBit[I]) == 0) {
10897 unsigned Reg = SystemZMC::GR64Regs[I];
10898 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10899 }
10900 }
10901
10902 // Add FPR/VR clobbers.
10903 if (!NoFloat && (Control & 4) != 0) {
10904 if (Subtarget.hasVector()) {
10905 for (unsigned Reg : SystemZMC::VR128Regs) {
10906 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10907 }
10908 } else {
10909 for (unsigned Reg : SystemZMC::FP64Regs) {
10910 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10911 }
10912 }
10913 }
10914
10915 return MBB;
10916}
10917
10918MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10919 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10920 MachineFunction &MF = *MBB->getParent();
10921 MachineRegisterInfo *MRI = &MF.getRegInfo();
10922 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10923 DebugLoc DL = MI.getDebugLoc();
10924
10925 Register SrcReg = MI.getOperand(0).getReg();
10926
10927 // Create new virtual register of the same class as source.
10928 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10929 Register DstReg = MRI->createVirtualRegister(RC);
10930
10931 // Replace pseudo with a normal load-and-test that models the def as
10932 // well.
10933 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10934 .addReg(SrcReg)
10935 .setMIFlags(MI.getFlags());
10936 MI.eraseFromParent();
10937
10938 return MBB;
10939}
10940
10941MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10943 MachineFunction &MF = *MBB->getParent();
10944 MachineRegisterInfo *MRI = &MF.getRegInfo();
10945 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10946 DebugLoc DL = MI.getDebugLoc();
10947 const unsigned ProbeSize = getStackProbeSize(MF);
10948 Register DstReg = MI.getOperand(0).getReg();
10949 Register SizeReg = MI.getOperand(2).getReg();
10950
10951 MachineBasicBlock *StartMBB = MBB;
10952 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10953 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
10954 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
10955 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
10956 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
10957
10958 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
10960
10961 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10962 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10963
10964 // LoopTestMBB
10965 // BRC TailTestMBB
10966 // # fallthrough to LoopBodyMBB
10967 StartMBB->addSuccessor(LoopTestMBB);
10968 MBB = LoopTestMBB;
10969 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
10970 .addReg(SizeReg)
10971 .addMBB(StartMBB)
10972 .addReg(IncReg)
10973 .addMBB(LoopBodyMBB);
10974 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
10975 .addReg(PHIReg)
10976 .addImm(ProbeSize);
10977 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10979 .addMBB(TailTestMBB);
10980 MBB->addSuccessor(LoopBodyMBB);
10981 MBB->addSuccessor(TailTestMBB);
10982
10983 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
10984 // J LoopTestMBB
10985 MBB = LoopBodyMBB;
10986 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
10987 .addReg(PHIReg)
10988 .addImm(ProbeSize);
10989 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
10990 .addReg(SystemZ::R15D)
10991 .addImm(ProbeSize);
10992 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10993 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
10994 .setMemRefs(VolLdMMO);
10995 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
10996 MBB->addSuccessor(LoopTestMBB);
10997
10998 // TailTestMBB
10999 // BRC DoneMBB
11000 // # fallthrough to TailMBB
11001 MBB = TailTestMBB;
11002 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
11003 .addReg(PHIReg)
11004 .addImm(0);
11005 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11007 .addMBB(DoneMBB);
11008 MBB->addSuccessor(TailMBB);
11009 MBB->addSuccessor(DoneMBB);
11010
11011 // TailMBB
11012 // # fallthrough to DoneMBB
11013 MBB = TailMBB;
11014 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
11015 .addReg(SystemZ::R15D)
11016 .addReg(PHIReg);
11017 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11018 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
11019 .setMemRefs(VolLdMMO);
11020 MBB->addSuccessor(DoneMBB);
11021
11022 // DoneMBB
11023 MBB = DoneMBB;
11024 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
11025 .addReg(SystemZ::R15D);
11026
11027 MI.eraseFromParent();
11028 return DoneMBB;
11029}
11030
11031SDValue SystemZTargetLowering::
11032getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
11033 MachineFunction &MF = DAG.getMachineFunction();
11034 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
11035 SDLoc DL(SP);
11036 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
11037 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
11038}
11039
11042 switch (MI.getOpcode()) {
11043 case SystemZ::ADJCALLSTACKDOWN:
11044 case SystemZ::ADJCALLSTACKUP:
11045 return emitAdjCallStack(MI, MBB);
11046
11047 case SystemZ::Select32:
11048 case SystemZ::Select64:
11049 case SystemZ::Select128:
11050 case SystemZ::SelectF32:
11051 case SystemZ::SelectF64:
11052 case SystemZ::SelectF128:
11053 case SystemZ::SelectVR32:
11054 case SystemZ::SelectVR64:
11055 case SystemZ::SelectVR128:
11056 return emitSelect(MI, MBB);
11057
11058 case SystemZ::CondStore8Mux:
11059 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
11060 case SystemZ::CondStore8MuxInv:
11061 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
11062 case SystemZ::CondStore16Mux:
11063 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
11064 case SystemZ::CondStore16MuxInv:
11065 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11066 case SystemZ::CondStore32Mux:
11067 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11068 case SystemZ::CondStore32MuxInv:
11069 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11070 case SystemZ::CondStore8:
11071 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11072 case SystemZ::CondStore8Inv:
11073 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11074 case SystemZ::CondStore16:
11075 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11076 case SystemZ::CondStore16Inv:
11077 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11078 case SystemZ::CondStore32:
11079 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11080 case SystemZ::CondStore32Inv:
11081 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11082 case SystemZ::CondStore64:
11083 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11084 case SystemZ::CondStore64Inv:
11085 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11086 case SystemZ::CondStoreF32:
11087 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11088 case SystemZ::CondStoreF32Inv:
11089 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11090 case SystemZ::CondStoreF64:
11091 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11092 case SystemZ::CondStoreF64Inv:
11093 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11094
11095 case SystemZ::SCmp128Hi:
11096 return emitICmp128Hi(MI, MBB, false);
11097 case SystemZ::UCmp128Hi:
11098 return emitICmp128Hi(MI, MBB, true);
11099
11100 case SystemZ::PAIR128:
11101 return emitPair128(MI, MBB);
11102 case SystemZ::AEXT128:
11103 return emitExt128(MI, MBB, false);
11104 case SystemZ::ZEXT128:
11105 return emitExt128(MI, MBB, true);
11106
11107 case SystemZ::ATOMIC_SWAPW:
11108 return emitAtomicLoadBinary(MI, MBB, 0);
11109
11110 case SystemZ::ATOMIC_LOADW_AR:
11111 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11112 case SystemZ::ATOMIC_LOADW_AFI:
11113 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11114
11115 case SystemZ::ATOMIC_LOADW_SR:
11116 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11117
11118 case SystemZ::ATOMIC_LOADW_NR:
11119 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11120 case SystemZ::ATOMIC_LOADW_NILH:
11121 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11122
11123 case SystemZ::ATOMIC_LOADW_OR:
11124 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11125 case SystemZ::ATOMIC_LOADW_OILH:
11126 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11127
11128 case SystemZ::ATOMIC_LOADW_XR:
11129 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11130 case SystemZ::ATOMIC_LOADW_XILF:
11131 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11132
11133 case SystemZ::ATOMIC_LOADW_NRi:
11134 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11135 case SystemZ::ATOMIC_LOADW_NILHi:
11136 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11137
11138 case SystemZ::ATOMIC_LOADW_MIN:
11139 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11140 case SystemZ::ATOMIC_LOADW_MAX:
11141 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11142 case SystemZ::ATOMIC_LOADW_UMIN:
11143 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11144 case SystemZ::ATOMIC_LOADW_UMAX:
11145 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11146
11147 case SystemZ::ATOMIC_CMP_SWAPW:
11148 return emitAtomicCmpSwapW(MI, MBB);
11149 case SystemZ::MVCImm:
11150 case SystemZ::MVCReg:
11151 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11152 case SystemZ::NCImm:
11153 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11154 case SystemZ::OCImm:
11155 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11156 case SystemZ::XCImm:
11157 case SystemZ::XCReg:
11158 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11159 case SystemZ::CLCImm:
11160 case SystemZ::CLCReg:
11161 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11162 case SystemZ::MemsetImmImm:
11163 case SystemZ::MemsetImmReg:
11164 case SystemZ::MemsetRegImm:
11165 case SystemZ::MemsetRegReg:
11166 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11167 case SystemZ::CLSTLoop:
11168 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11169 case SystemZ::MVSTLoop:
11170 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11171 case SystemZ::SRSTLoop:
11172 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11173 case SystemZ::TBEGIN:
11174 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11175 case SystemZ::TBEGIN_nofloat:
11176 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11177 case SystemZ::TBEGINC:
11178 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11179 case SystemZ::LTEBRCompare_Pseudo:
11180 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11181 case SystemZ::LTDBRCompare_Pseudo:
11182 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11183 case SystemZ::LTXBRCompare_Pseudo:
11184 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11185
11186 case SystemZ::PROBED_ALLOCA:
11187 return emitProbedAlloca(MI, MBB);
11188 case SystemZ::EH_SjLj_SetJmp:
11189 return emitEHSjLjSetJmp(MI, MBB);
11190 case SystemZ::EH_SjLj_LongJmp:
11191 return emitEHSjLjLongJmp(MI, MBB);
11192
11193 case TargetOpcode::STACKMAP:
11194 case TargetOpcode::PATCHPOINT:
11195 return emitPatchPoint(MI, MBB);
11196
11197 default:
11198 llvm_unreachable("Unexpected instr type to insert");
11199 }
11200}
11201
11202// This is only used by the isel schedulers, and is needed only to prevent
11203// compiler from crashing when list-ilp is used.
11204const TargetRegisterClass *
11205SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11206 if (VT == MVT::Untyped)
11207 return &SystemZ::ADDR128BitRegClass;
11209}
11210
11211SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11212 SelectionDAG &DAG) const {
11213 SDLoc dl(Op);
11214 /*
11215 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11216 settings:
11217 00 Round to nearest
11218 01 Round to 0
11219 10 Round to +inf
11220 11 Round to -inf
11221
11222 FLT_ROUNDS, on the other hand, expects the following:
11223 -1 Undefined
11224 0 Round to 0
11225 1 Round to nearest
11226 2 Round to +inf
11227 3 Round to -inf
11228 */
11229
11230 // Save FPC to register.
11231 SDValue Chain = Op.getOperand(0);
11232 SDValue EFPC(
11233 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11234 Chain = EFPC.getValue(1);
11235
11236 // Transform as necessary
11237 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11238 DAG.getConstant(3, dl, MVT::i32));
11239 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11240 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11241 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11242 DAG.getConstant(1, dl, MVT::i32)));
11243
11244 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11245 DAG.getConstant(1, dl, MVT::i32));
11246 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11247
11248 return DAG.getMergeValues({RetVal, Chain}, dl);
11249}
11250
11251SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11252 SelectionDAG &DAG) const {
11253 EVT VT = Op.getValueType();
11254 Op = Op.getOperand(0);
11255 EVT OpVT = Op.getValueType();
11256
11257 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11258
11259 SDLoc DL(Op);
11260
11261 // load a 0 vector for the third operand of VSUM.
11262 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11263
11264 // execute VSUM.
11265 switch (OpVT.getScalarSizeInBits()) {
11266 case 8:
11267 case 16:
11268 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11269 [[fallthrough]];
11270 case 32:
11271 case 64:
11272 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11273 DAG.getBitcast(Op.getValueType(), Zero));
11274 break;
11275 case 128:
11276 break; // VSUM over v1i128 should not happen and would be a noop
11277 default:
11278 llvm_unreachable("Unexpected scalar size.");
11279 }
11280 // Cast to original vector type, retrieve last element.
11281 return DAG.getNode(
11282 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11283 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11284}
11285
11287 FunctionType *FT = F->getFunctionType();
11288 const AttributeList &Attrs = F->getAttributes();
11289 if (Attrs.hasRetAttrs())
11290 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11291 OS << *F->getReturnType() << " @" << F->getName() << "(";
11292 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11293 if (I)
11294 OS << ", ";
11295 OS << *FT->getParamType(I);
11296 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11297 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11298 if (ArgAttrs.hasAttribute(A))
11299 OS << " " << Attribute::getNameFromAttrKind(A);
11300 }
11301 OS << ")\n";
11302}
11303
11304bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11305 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11306 if (Itr == IsInternalCache.end())
11307 Itr = IsInternalCache
11308 .insert(std::pair<const Function *, bool>(
11309 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11310 .first;
11311 return Itr->second;
11312}
11313
11314void SystemZTargetLowering::
11315verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11316 const Function *F, SDValue Callee) const {
11317 // Temporarily only do the check when explicitly requested, until it can be
11318 // enabled by default.
11320 return;
11321
11322 bool IsInternal = false;
11323 const Function *CalleeFn = nullptr;
11324 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11325 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11326 IsInternal = isInternal(CalleeFn);
11327 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11328 errs() << "ERROR: Missing extension attribute of passed "
11329 << "value in call to function:\n" << "Callee: ";
11330 if (CalleeFn != nullptr)
11331 printFunctionArgExts(CalleeFn, errs());
11332 else
11333 errs() << "-\n";
11334 errs() << "Caller: ";
11336 llvm_unreachable("");
11337 }
11338}
11339
11340void SystemZTargetLowering::
11341verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11342 const Function *F) const {
11343 // Temporarily only do the check when explicitly requested, until it can be
11344 // enabled by default.
11346 return;
11347
11348 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11349 errs() << "ERROR: Missing extension attribute of returned "
11350 << "value from function:\n";
11352 llvm_unreachable("");
11353 }
11354}
11355
11356// Verify that narrow integer arguments are extended as required by the ABI.
11357// Return false if an error is found.
11358bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11359 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11360 if (!Subtarget.isTargetELF())
11361 return true;
11362
11365 return true;
11366 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11367 return true;
11368
11369 for (unsigned i = 0; i < Outs.size(); ++i) {
11370 MVT VT = Outs[i].VT;
11371 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11372 if (VT.isInteger()) {
11373 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11374 "Unexpected integer argument VT.");
11375 if (VT == MVT::i32 &&
11376 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11377 return false;
11378 }
11379 }
11380
11381 return true;
11382}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static SDValue mergeHighParts(SelectionDAG &DAG, const SDLoc &DL, unsigned MergedBits, EVT VT, SDValue Op0, SDValue Op1)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool analyzeArgSplit(const SmallVectorImpl< ArgTy > &Args, SmallVector< CCValAssign, 16 > &ArgLocs, unsigned I, MVT &PartVT, unsigned &NumParts)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static SDValue buildFPVecFromScalars4(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl< SDValue > &Elems, unsigned Pos)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
MachineConstantPoolValue * getMachineCPVal() const
const Constant * getConstVal() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:939
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:776
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:651
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:216
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
iterator end() const
Definition StringRef.h:115
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:403
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:471
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:470
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:464
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition ISDOpcodes.h:997
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Define
Register definition.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:256
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:192
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:178
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:327
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:186
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.